Projects >> nutch >>746c40d935bc57b5de58e51d6294166b39fada1a

Chunk
Conflicting content
  @Override
  public void setPageRetrySchedule(String url, WebPage page,
          long prevFetchTime, long prevModifiedTime, long fetchTime) {
<<<<<<< HEAD
    datum.setFetchTime(fetchTime + (long)SECONDS_PER_DAY*1000);
    datum.setRetriesSinceFetch(datum.getRetriesSinceFetch() + 1);
    return datum;
=======
    page.setFetchTime(fetchTime + SECONDS_PER_DAY * 1000L);
    page.setRetriesSinceFetch(page.getRetriesSinceFetch() + 1);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  /**
Solution content
  }
  @Override
  public void setPageRetrySchedule(String url, WebPage page,
          long prevFetchTime, long prevModifiedTime, long fetchTime) {
    page.setFetchTime(fetchTime + SECONDS_PER_DAY * 1000L);
    page.setRetriesSinceFetch(page.getRetriesSinceFetch() + 1);

  /**
File
AbstractFetchSchedule.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Return statement
Variable
Chunk
Conflicting content
    // pages are never truly GONE - we have to check them from time to time.
    // pages with too long fetchInterval are adjusted so that they fit within
    // maximum fetchInterval (segment retention period).
<<<<<<< HEAD
    if (datum.getFetchTime() - curTime > (long) maxInterval * 1000) {
      if (datum.getFetchInterval() > maxInterval) {
        datum.setFetchInterval(maxInterval * 0.9f);
      }
      datum.setFetchTime(curTime);
=======
    long fetchTime = page.getFetchTime();
    if (fetchTime - curTime > maxInterval * 1000L) {
      if (page.getFetchInterval() > maxInterval) {
        page.setFetchInterval(Math.round(maxInterval * 0.9f));
      }
      page.setFetchTime(curTime);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    }
    return fetchTime <= curTime;
  }
Solution content
    // pages are never truly GONE - we have to check them from time to time.
    // pages with too long fetchInterval are adjusted so that they fit within
    // maximum fetchInterval (segment retention period).
    long fetchTime = page.getFetchTime();
    if (fetchTime - curTime > maxInterval * 1000L) {
      if (page.getFetchInterval() > maxInterval) {
        page.setFetchInterval(Math.round(maxInterval * 0.9f));
      }
      page.setFetchTime(curTime);
    }
    return fetchTime <= curTime;
  }
File
AbstractFetchSchedule.java
Developer's decision
Version 2
Kind of conflict
If statement
Method invocation
Variable
Chunk
Conflicting content
    if (datum.getFetchInterval() == 0 ) {
          long fetchTime, long modifiedTime, int state) {
    super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
        fetchTime, modifiedTime, state);
<<<<<<< HEAD
      datum.setFetchInterval(defaultInterval);
    }
    datum.setFetchTime(fetchTime + (long)datum.getFetchInterval() * 1000);
    datum.setModifiedTime(modifiedTime);
    return datum;
=======
    page.setFetchTime(fetchTime + page.getFetchInterval() * 1000L);
    page.setModifiedTime(modifiedTime);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

}
Solution content
          long fetchTime, long modifiedTime, int state) {
    super.setFetchSchedule(url, page, prevFetchTime, prevModifiedTime,
        fetchTime, modifiedTime, state);
    page.setFetchTime(fetchTime + page.getFetchInterval() * 1000L);
    page.setModifiedTime(modifiedTime);
  }

}
File
DefaultFetchSchedule.java
Developer's decision
Version 2
Kind of conflict
If statement
Method invocation
Return statement
Variable
Chunk
Conflicting content
 */
public class MD5Signature extends Signature {

<<<<<<< HEAD
  public byte[] calculate(Content content, Parse parse) {
    byte[] data = content.getContent();
    if (data == null) data = content.getUrl().getBytes();
    return MD5Hash.digest(data).getDigest();
=======
  private final static Collection FIELDS = new HashSet();

  static {
    FIELDS.add(WebPage.Field.CONTENT);
  }

  @Override
  public byte[] calculate(WebPage page) {
    byte[] data = page.getContent().array();
    if (data == null && page.getBaseUrl()!=null) data = page.getBaseUrl().getBytes();
    return MD5Hash.digest(data).getDigest();
  }

  @Override
  public Collection getFields() {
    return FIELDS;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }
}
Solution content
 */
public class MD5Signature extends Signature {

  private final static Collection FIELDS = new HashSet();

  static {
    FIELDS.add(WebPage.Field.CONTENT);
  }

  @Override
  public byte[] calculate(WebPage page) {
    byte[] data = page.getContent().array();
    if (data == null && page.getBaseUrl()!=null) data = page.getBaseUrl().getBytes();
    return MD5Hash.digest(data).getDigest();
  }

  @Override
  public Collection getFields() {
    return FIELDS;
  }
}
File
MD5Signature.java
Developer's decision
Version 2
Kind of conflict
Annotation
Attribute
If statement
Method declaration
Method invocation
Method signature
Return statement
Static initializer
Variable
Chunk
Conflicting content
  static {
    CLASSES = new Class[] {
<<<<<<< HEAD
      org.apache.hadoop.io.NullWritable.class, 
      org.apache.hadoop.io.LongWritable.class,
      org.apache.hadoop.io.BytesWritable.class,
      org.apache.hadoop.io.FloatWritable.class,
      org.apache.hadoop.io.IntWritable.class,
      org.apache.hadoop.io.Text.class,
      org.apache.hadoop.io.MD5Hash.class,
      org.apache.nutch.crawl.CrawlDatum.class,
      org.apache.nutch.crawl.Inlink.class,
      org.apache.nutch.crawl.Inlinks.class,
      org.apache.nutch.crawl.MapWritable.class,
      org.apache.nutch.fetcher.FetcherOutput.class,
      org.apache.nutch.metadata.Metadata.class,
      org.apache.nutch.parse.Outlink.class,
      org.apache.nutch.parse.ParseText.class,
      org.apache.nutch.parse.ParseData.class,
      org.apache.nutch.parse.ParseImpl.class,
      org.apache.nutch.parse.ParseStatus.class,
      org.apache.nutch.protocol.Content.class,
      org.apache.nutch.protocol.ProtocolStatus.class,
=======
      org.apache.nutch.scoring.ScoreDatum.class,
      org.apache.nutch.util.WebPageWritable.class
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    };
  }
Solution content
  static {
    CLASSES = new Class[] {
      org.apache.nutch.scoring.ScoreDatum.class,
      org.apache.nutch.util.WebPageWritable.class
    };
  }
File
NutchWritable.java
Developer's decision
Version 2
Kind of conflict
Other
Chunk
Conflicting content
   * should be discarded)
   * @throws IndexingException
   */
<<<<<<< HEAD
  NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
    throws IndexingException;
=======
  NutchDocument filter(NutchDocument doc, String url, WebPage page)
  throws IndexingException;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}
Solution content
   * should be discarded)
   * @throws IndexingException
   */
  NutchDocument filter(NutchDocument doc, String url, WebPage page)
  throws IndexingException;
}
File
IndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Method interface
Chunk
Conflicting content
import java.io.DataOutput;
import java.io.IOException;
import java.net.MalformedURLException;
<<<<<<< HEAD
import java.text.SimpleDateFormat;
=======
import java.util.ArrayList;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import java.util.Iterator;
import java.util.List;
Solution content
import java.io.DataOutput;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.util.NutchConfiguration;
<<<<<<< HEAD
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
Solution content
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
      return docBegin;
    }

<<<<<<< HEAD
    public int getNumDocs() {
      return numDocs;
    }

=======
    @Override
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    public long getLength() throws IOException {
      return numDocs;
    }
Solution content
      return docBegin;
    }

    @Override
    public long getLength() throws IOException {
      return numDocs;
    }
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method declaration
Chunk
Conflicting content
  
  public static class SolrRecordReader extends RecordReader {

<<<<<<< HEAD
    public void readFields(DataInput in) throws IOException {
      docBegin = in.readInt();
      numDocs = in.readInt();
    }

    public void write(DataOutput out) throws IOException {
      out.writeInt(docBegin);
      out.writeInt(numDocs);
=======
    private int currentDoc = 0;
    private int numDocs;
    private Text text;
    private SolrRecord record;
    private SolrDocumentList solrDocs;
    
    public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
      this.solrDocs = solrDocs;
      this.numDocs = numDocs;
    }
    
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
      text = new Text();
      record = new SolrRecord();   
    }

    @Override
    public void close() throws IOException { }

    @Override
    public float getProgress() throws IOException {
      return currentDoc / (float) numDocs;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    }

    @Override
Solution content
  
  public static class SolrRecordReader extends RecordReader {

    private int currentDoc = 0;
    private int numDocs;
    private Text text;
    private SolrRecord record;
    private SolrDocumentList solrDocs;
    
    public SolrRecordReader(SolrDocumentList solrDocs, int numDocs) {
      this.solrDocs = solrDocs;
      this.numDocs = numDocs;
    }
    
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
      text = new Text();
      record = new SolrRecord();   
    }

    @Override
    public void close() throws IOException { }

    @Override
    public float getProgress() throws IOException {
      return currentDoc / (float) numDocs;
    }

    @Override
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Annotation
Attribute
Cast expression
Method declaration
Method invocation
Method signature
Return statement
Chunk
Conflicting content
      }

      final SolrDocumentList solrDocs = response.getResults();
<<<<<<< HEAD

      return new RecordReader() {

        private int currentDoc = 0;

        public void close() throws IOException { }

        public Text createKey() {
          return new Text();
        }

        public SolrRecord createValue() {
          return new SolrRecord();
        }

        public long getPos() throws IOException {
          return currentDoc;
        }

        public float getProgress() throws IOException {
          return currentDoc / (float) numDocs;
        }

        public boolean next(Text key, SolrRecord value) throws IOException {
          if (currentDoc >= numDocs) {
            return false;
          }

          SolrDocument doc = solrDocs.get(currentDoc);
          String digest = (String) doc.getFieldValue(SolrConstants.DIGEST_FIELD);
          key.set(digest);
          value.readSolrDocument(doc);

          currentDoc++;
          return true;
        }    
      };
=======
      return new SolrRecordReader(solrDocs, numDocs);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    }
  }
Solution content
      }

      final SolrDocumentList solrDocs = response.getResults();
      return new SolrRecordReader(solrDocs, numDocs);
    }
  }
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Return statement
Chunk
Conflicting content
    this.conf = conf;
  }

<<<<<<< HEAD
  public void configure(JobConf job) {
=======
  @Override
  public void setup(Context job) throws IOException {
    Configuration conf = job.getConfiguration();
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    try {
      solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
    } catch (MalformedURLException e) {
Solution content
    this.conf = conf;
  }

  @Override
  public void setup(Context job) throws IOException {
    Configuration conf = job.getConfiguration();
    try {
      solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
    } catch (MalformedURLException e) {
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method invocation
Method signature
Variable
Chunk
Conflicting content
  }


<<<<<<< HEAD
  public void close() throws IOException {
=======
  @Override
  public void cleanup(Context context) throws IOException {
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    try {
      if (numDeletes > 0) {
        LOG.info("SolrDeleteDuplicates: deleting " + numDeletes + " duplicates");
Solution content
  }


  @Override
  public void cleanup(Context context) throws IOException {
    try {
      if (numDeletes > 0) {
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method signature
Chunk
Conflicting content
    }
  }

<<<<<<< HEAD
  public void reduce(Text key, Iterator values,
      OutputCollector output, Reporter reporter)
  throws IOException {
    SolrRecord recordToKeep = new SolrRecord(values.next());
    while (values.hasNext()) {
      SolrRecord solrRecord = values.next();
=======
  @Override
  public void reduce(Text key, Iterable values, Context context)
  throws IOException {
    Iterator iterator = values.iterator();
    SolrRecord recordToKeep = iterator.next();
    while (iterator.hasNext()) {
      SolrRecord solrRecord = iterator.next();
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
          (solrRecord.getBoost() == recordToKeep.getBoost() && 
              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
Solution content
    }
  }

  @Override
  public void reduce(Text key, Iterable values, Context context)
  throws IOException {
    Iterator iterator = values.iterator();
    SolrRecord recordToKeep = iterator.next();
    while (iterator.hasNext()) {
      SolrRecord solrRecord = iterator.next();
      if (solrRecord.getBoost() > recordToKeep.getBoost() ||
          (solrRecord.getBoost() == recordToKeep.getBoost() && 
              solrRecord.getTstamp() > recordToKeep.getTstamp())) {
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method invocation
Method signature
Variable
While statement
Chunk
Conflicting content
    }
  }

<<<<<<< HEAD
  public void dedup(String solrUrl) throws IOException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("SolrDeleteDuplicates: starting at " + sdf.format(start));
=======
  public boolean dedup(String solrUrl)
  throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("SolrDeleteDuplicates: starting...");
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
    
    getConf().set(SolrConstants.SERVER_URL, solrUrl);
Solution content
    }
  }

  public boolean dedup(String solrUrl)
  throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("SolrDeleteDuplicates: starting...");
    LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
    
    getConf().set(SolrConstants.SERVER_URL, solrUrl);
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Method signature
Variable
Chunk
Conflicting content
    job.setMapperClass(Mapper.class);
    job.setReducerClass(SolrDeleteDuplicates.class);

<<<<<<< HEAD
    JobClient.runJob(job);

    long end = System.currentTimeMillis();
    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
=======
    return job.waitForCompletion(true);    
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  public int run(String[] args)
Solution content
    job.setMapperClass(Mapper.class);
    job.setReducerClass(SolrDeleteDuplicates.class);

    return job.waitForCompletion(true);    
  }

  public int run(String[] args)
File
SolrDeleteDuplicates.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Return statement
Variable
Chunk
Conflicting content
  private int commitSize;

<<<<<<< HEAD
  public void open(JobConf job, String name) throws IOException {
    solr = new CommonsHttpSolrServer(job.get(SolrConstants.SERVER_URL));
    commitSize = job.getInt(SolrConstants.COMMIT_SIZE, 1000);
    solrMapping = SolrMappingReader.getInstance(job);
=======
  @Override
  public void open(TaskAttemptContext job, String name)
  throws IOException {
    Configuration conf = job.getConfiguration();
    solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
    commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
    solrMapping = SolrMappingReader.getInstance(conf);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  @Override
Solution content
  private int commitSize;

  @Override
  public void open(TaskAttemptContext job, String name)
  throws IOException {
    Configuration conf = job.getConfiguration();
    solr = new CommonsHttpSolrServer(conf.get(SolrConstants.SERVER_URL));
    commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
    solrMapping = SolrMappingReader.getInstance(conf);
  }

  @Override
File
SolrWriter.java
Developer's decision
Version 2
Kind of conflict
Annotation
Attribute
Method invocation
Method signature
Variable
Chunk
Conflicting content
  @Override
  public void write(NutchDocument doc) throws IOException {
    final SolrInputDocument inputDoc = new SolrInputDocument();
<<<<<<< HEAD
    for(final Entry e : doc) {
      for (final Object val : e.getValue().getValues()) {
        inputDoc.addField(solrMapping.mapKey(e.getKey()), val, e.getValue().getWeight());
        String sCopy = solrMapping.mapCopyKey(e.getKey());
        if (sCopy != e.getKey()) {
        	inputDoc.addField(sCopy, val);	
=======
    for(final Entry> e : doc) {
      for (final String val : e.getValue()) {
        inputDoc.addField(solrMapping.mapKey(e.getKey()), val);
        String sCopy = solrMapping.mapCopyKey(e.getKey());
        if (sCopy != e.getKey()) {
        	inputDoc.addField(sCopy, val);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
        }
      }
    }
Solution content
  @Override
  public void write(NutchDocument doc) throws IOException {
    final SolrInputDocument inputDoc = new SolrInputDocument();
    for(final Entry> e : doc) {
      for (final String val : e.getValue()) {
        inputDoc.addField(solrMapping.mapKey(e.getKey()), val);
        String sCopy = solrMapping.mapCopyKey(e.getKey());
        if (sCopy != e.getKey()) {
        	inputDoc.addField(sCopy, val);
        }
      }
    }
File
SolrWriter.java
Developer's decision
Version 2
Kind of conflict
For statement
If statement
Method invocation
Variable
Chunk
Conflicting content
        solr.add(inputDocs);
        inputDocs.clear();
      }
<<<<<<< HEAD
      // solr.commit();
=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    } catch (final SolrServerException e) {
      throw new IOException(e);
    }
Solution content
        solr.add(inputDocs);
        inputDocs.clear();
      }
    } catch (final SolrServerException e) {
      throw new IOException(e);
    }
File
SolrWriter.java
Developer's decision
Version 2
Kind of conflict
Comment
Chunk
Conflicting content
package org.apache.nutch.parse;

import java.util.ArrayList;
<<<<<<< HEAD
=======
import java.util.Collection;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import java.util.HashMap;
import java.util.HashSet;
Solution content
package org.apache.nutch.parse;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
File
HtmlParseFilters.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
  public static final String HTMLPARSEFILTER_ORDER = "htmlparsefilter.order";

  public HtmlParseFilters(Configuration conf) {
<<<<<<< HEAD
        String order = conf.get(HTMLPARSEFILTER_ORDER);
        ObjectCache objectCache = ObjectCache.get(conf);
        this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
        if (htmlParseFilters == null) {
          /*
           * If ordered filters are required, prepare array of filters based on
           * property
           */
          String[] orderedFilters = null;
          if (order != null && !order.trim().equals("")) {
            orderedFilters = order.split("\\s+");
          }
            HashMap filterMap =
              new HashMap();
            try {
                ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(HtmlParseFilter.X_POINT_ID);
                if (point == null)
                    throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
                Extension[] extensions = point.getExtensions();
                for (int i = 0; i < extensions.length; i++) {
                    Extension extension = extensions[i];
                    HtmlParseFilter parseFilter = (HtmlParseFilter) extension.getExtensionInstance();
                    if (!filterMap.containsKey(parseFilter.getClass().getName())) {
                        filterMap.put(parseFilter.getClass().getName(), parseFilter);
                    }
                }
                HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(new HtmlParseFilter[filterMap.size()]);
                /*
                 * If no ordered filters required, just get the filters in an
                 * indeterminate order
                 */
                if (orderedFilters == null) {
                  objectCache.setObject(HtmlParseFilter.class.getName(), htmlParseFilters);
                }
                /* Otherwise run the filters in the required order */
                else {
                  ArrayList filters = new ArrayList();
                  for (int i = 0; i < orderedFilters.length; i++) {
                    HtmlParseFilter filter = filterMap
                        .get(orderedFilters[i]);
                    if (filter != null) {
                      filters.add(filter);
                    }
                  }
                  objectCache.setObject(HtmlParseFilter.class.getName(), filters
                      .toArray(new HtmlParseFilter[filters.size()]));
                }
            } catch (PluginRuntimeException e) {
                throw new RuntimeException(e);
=======
    String order = conf.get(HTMLPARSEFILTER_ORDER);
    ObjectCache objectCache = ObjectCache.get(conf);
    this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
    if (htmlParseFilters == null) {
      /*
       * If ordered filters are required, prepare array of filters based on
       * property
       */
      String[] orderedFilters = null;
      if (order != null && !order.trim().equals("")) {
        orderedFilters = order.split("\\s+");
      }
      HashMap filterMap =
        new HashMap();
      try {
        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(HtmlParseFilter.X_POINT_ID);
        if (point == null)
          throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
        Extension[] extensions = point.getExtensions();
        for (int i = 0; i < extensions.length; i++) {
          Extension extension = extensions[i];
          HtmlParseFilter parseFilter = (HtmlParseFilter) extension.getExtensionInstance();
          if (!filterMap.containsKey(parseFilter.getClass().getName())) {
            filterMap.put(parseFilter.getClass().getName(), parseFilter);
          }
        }
        HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(new HtmlParseFilter[filterMap.size()]);
        /*
         * If no ordered filters required, just get the filters in an
         * indeterminate order
         */
        if (orderedFilters == null) {
          objectCache.setObject(HtmlParseFilter.class.getName(), htmlParseFilters);
        }
        /* Otherwise run the filters in the required order */
        else {
          ArrayList filters = new ArrayList();
          for (int i = 0; i < orderedFilters.length; i++) {
            HtmlParseFilter filter = filterMap
            .get(orderedFilters[i]);
            if (filter != null) {
              filters.add(filter);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
            }
          }
          objectCache.setObject(HtmlParseFilter.class.getName(), filters
Solution content
  public static final String HTMLPARSEFILTER_ORDER = "htmlparsefilter.order";

  public HtmlParseFilters(Configuration conf) {
    String order = conf.get(HTMLPARSEFILTER_ORDER);
    ObjectCache objectCache = ObjectCache.get(conf);
    this.htmlParseFilters = (HtmlParseFilter[]) objectCache.getObject(HtmlParseFilter.class.getName());
    if (htmlParseFilters == null) {
      /*
       * If ordered filters are required, prepare array of filters based on
       * property
       */
      String[] orderedFilters = null;
      if (order != null && !order.trim().equals("")) {
        orderedFilters = order.split("\\s+");
      }
      HashMap filterMap =
        new HashMap();
      try {
        ExtensionPoint point = PluginRepository.get(conf).getExtensionPoint(HtmlParseFilter.X_POINT_ID);
        if (point == null)
          throw new RuntimeException(HtmlParseFilter.X_POINT_ID + " not found.");
        Extension[] extensions = point.getExtensions();
        for (int i = 0; i < extensions.length; i++) {
          Extension extension = extensions[i];
          HtmlParseFilter parseFilter = (HtmlParseFilter) extension.getExtensionInstance();
          if (!filterMap.containsKey(parseFilter.getClass().getName())) {
            filterMap.put(parseFilter.getClass().getName(), parseFilter);
          }
        }
        HtmlParseFilter[] htmlParseFilters = filterMap.values().toArray(new HtmlParseFilter[filterMap.size()]);
        /*
         * If no ordered filters required, just get the filters in an
         * indeterminate order
         */
        if (orderedFilters == null) {
          objectCache.setObject(HtmlParseFilter.class.getName(), htmlParseFilters);
        }
        /* Otherwise run the filters in the required order */
        else {
          ArrayList filters = new ArrayList();
          for (int i = 0; i < orderedFilters.length; i++) {
            HtmlParseFilter filter = filterMap
            .get(orderedFilters[i]);
            if (filter != null) {
              filters.add(filter);
            }
          }
          objectCache.setObject(HtmlParseFilter.class.getName(), filters
File
HtmlParseFilters.java
Developer's decision
Version 2
Kind of conflict
Attribute
Cast expression
Catch clause
Comment
For statement
If statement
Method invocation
Throw statement
Try statement
Variable
Chunk
Conflicting content
package org.apache.nutch.parse;

// Commons Logging imports
<<<<<<< HEAD

=======
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
Solution content
package org.apache.nutch.parse;

// Commons Logging imports
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
File
ParseUtil.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

<<<<<<< HEAD
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;

=======
import org.apache.avro.util.Utf8;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlStatus;
import org.apache.nutch.crawl.Signature;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.crawl.URLWebPage;
import org.apache.nutch.fetcher.FetcherJob;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.ParseStatus;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.URLUtil;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

/**
 * A Utility class containing methods to simply perform parsing utilities such
Solution content
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.avro.util.Utf8;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlStatus;
import org.apache.nutch.crawl.Signature;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.crawl.URLWebPage;
import org.apache.nutch.fetcher.FetcherJob;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.ParseStatus;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.URLUtil;

/**
 * A Utility class containing methods to simply perform parsing utilities such
File
ParseUtil.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
<<<<<<< HEAD
  private ParserFactory parserFactory;
  /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
  private int MAX_PARSE_TIME = 30;
  
=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  /**
   *
   * @param conf
Solution content
  private ParserFactory parserFactory;
  /** Parser timeout set to 30 sec by default. Set -1 to deactivate **/
  private int MAX_PARSE_TIME = 30;
  /**
   *
   * @param conf
File
ParseUtil.java
Developer's decision
Version 1
Kind of conflict
Blank
Chunk
Conflicting content
   * @param conf
   */
  public ParseUtil(Configuration conf) {
<<<<<<< HEAD
    this.parserFactory = new ParserFactory(conf);
    MAX_PARSE_TIME=conf.getInt("parser.timeout", 30);
=======
    super(conf);
    setConf(conf);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  @Override
Solution content
   * @param conf
   */
  public ParseUtil(Configuration conf) {
    super(conf);
    setConf(conf);
  }

  @Override
File
ParseUtil.java
Developer's decision
Version 2
Kind of conflict
Attribute
Method invocation
Chunk
Conflicting content
      if (parse!=null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
        return parse;
      }
<<<<<<< HEAD
      if (MAX_PARSE_TIME!=-1)
      	parseResult = runParser(parsers[i], content);
      else 
      	parseResult = parsers[i].getParse(content);

      if (parseResult != null && !parseResult.isEmpty())
        return parseResult;
    }
   
    if (LOG.isWarnEnabled()) { 
      LOG.warn("Unable to successfully parse content " + content.getUrl() +
               " of type " + content.getContentType());
    }
    return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
=======
    }

    LOG.warn("Unable to successfully parse content " + url +
        " of type " + contentType);
    return ParseStatusUtils.getEmptyParse(new ParseException("Unable to successfully parse content"), null);
  }
  
  private Parse runParser(Parser p, String url, WebPage page) {
	  ParseCallable pc = new ParseCallable(p, page, url);
	  FutureTask task = new FutureTask(pc);
	  Parse res = null;
	  Thread t = new Thread(task);
	  t.start();
	  try {
		  res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
	  } catch (TimeoutException e) {
		  LOG.warn("TIMEOUT parsing " + url + " with " + p);
	  } catch (Exception e) {
		  task.cancel(true);
		  res = null;
		  t.interrupt();
	  } finally {
		  t = null;
		  pc = null;
	  }
	  return res;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  /**
Solution content
      if (parse!=null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
        return parse;
      }
    }

    LOG.warn("Unable to successfully parse content " + url +
        " of type " + contentType);
    return ParseStatusUtils.getEmptyParse(new ParseException("Unable to successfully parse content"), null);
  }
  
  private Parse runParser(Parser p, String url, WebPage page) {
	  ParseCallable pc = new ParseCallable(p, page, url);
	  FutureTask task = new FutureTask(pc);
	  Parse res = null;
	  Thread t = new Thread(task);
	  t.start();
	  try {
		  res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
	  } catch (TimeoutException e) {
		  LOG.warn("TIMEOUT parsing " + url + " with " + p);
	  } catch (Exception e) {
		  task.cancel(true);
		  res = null;
		  t.interrupt();
	  } finally {
		  t = null;
		  pc = null;
	  }
	  return res;
  }

  /**
File
ParseUtil.java
Developer's decision
Version 2
Kind of conflict
If statement
Method invocation
Method signature
Return statement
Try statement
Variable
Chunk
Conflicting content
      }
      return redirectedPage;
    }
<<<<<<< HEAD
    
    ParseResult parseResult = null;
    if (MAX_PARSE_TIME!=-1)
    	parseResult = runParser(p, content);
    else 
    	parseResult = p.getParse(content);
    if (parseResult != null && !parseResult.isEmpty()) {
      return parseResult;
    } else {
      if (LOG.isWarnEnabled()) {
        LOG.warn("Unable to successfully parse content " + content.getUrl() +
            " of type " + content.getContentType());
      }  
      return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
    }
  }

  private ParseResult runParser(Parser p, Content content) {
    ParseCallable pc = new ParseCallable(p, content);
    FutureTask task = new FutureTask(pc);
    ParseResult res = null;
    Thread t = new Thread(task);
    t.start();
    try {
      res = task.get(MAX_PARSE_TIME, TimeUnit.SECONDS);
    } catch (TimeoutException e) {
      LOG.warn("TIMEOUT parsing " + content.getUrl() + " with " + p);
    } catch (Exception e) {
      task.cancel(true);
      res = null;
      t.interrupt();
    } finally {
      t = null;
      pc = null;
            continue;
          if (toUrl == null) {
    }
    return res;
  }
  
=======

    Parse parse;
    try {
      parse = parse(url, page);
    } catch (final Exception e) {
      LOG.warn("Error parsing: " + url + ": " + StringUtils.stringifyException(e));
      return redirectedPage;
    }

    if (parse == null) {
      return redirectedPage;
    }

    final byte[] signature = sig.calculate(page);

    org.apache.nutch.storage.ParseStatus pstatus = parse.getParseStatus();
    page.setParseStatus(pstatus);
    if (ParseStatusUtils.isSuccess(pstatus)) {
      if (pstatus.getMinorCode() == ParseStatusCodes.SUCCESS_REDIRECT) {
        String newUrl = ParseStatusUtils.getMessage(pstatus);
        int refreshTime = Integer.parseInt(ParseStatusUtils.getArg(pstatus, 1));
        try {
          newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
          newUrl = filters.filter(newUrl);
        } catch (URLFilterException e) {
          return redirectedPage; // TODO: is this correct
        } catch (MalformedURLException e) {
          return redirectedPage;
        }
        if (newUrl == null || newUrl.equals(url)) {
          String reprUrl = URLUtil.chooseRepr(url, newUrl,
              refreshTime < FetcherJob.PERM_REFRESH_TIME);
          WebPage newWebPage = new WebPage();
          page.setReprUrl(new Utf8(reprUrl));
          page.putToMetadata(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
          redirectedPage = new URLWebPage(reprUrl, newWebPage);
        }
      } else {
        page.setText(new Utf8(parse.getText()));
        page.setTitle(new Utf8(parse.getTitle()));
        ByteBuffer prevSig = page.getSignature();
        if (prevSig != null) {
          page.setPrevSignature(prevSig);
        }
        page.setSignature(ByteBuffer.wrap(signature));
        if (page.getOutlinks() != null) {
          page.getOutlinks().clear();
        }
        final Outlink[] outlinks = parse.getOutlinks();
        final int count = 0;
        String fromHost;
        if (ignoreExternalLinks) {
          try {
            fromHost = new URL(url).getHost().toLowerCase();
          } catch (final MalformedURLException e) {
            fromHost = null;
          }
        } else {
          fromHost = null;
        }
        for (int i = 0; count < maxOutlinks && i < outlinks.length; i++) {
          String toUrl = outlinks[i].getToUrl();
          try {
            toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK);
            toUrl = filters.filter(toUrl);
          } catch (final URLFilterException e) {
            continue;
          }
          catch (MalformedURLException e2){
            continue;
          }
          }
          String toHost;
          if (ignoreExternalLinks) {
            try {
              toHost = new URL(toUrl).getHost().toLowerCase();
            } catch (final MalformedURLException e) {
              toHost = null;
            }
            if (toHost == null || !toHost.equals(fromHost)) { // external links
              continue; // skip it
            }
          }

          page.putToOutlinks(new Utf8(toUrl), new Utf8(outlinks[i].getAnchor()));
        }
        Mark.PARSE_MARK.putMark(page, Mark.FETCH_MARK.checkMark(page));
      }
    }
    return redirectedPage;
  }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}
Solution content
      }
      return redirectedPage;
    }

    Parse parse;
    try {
      parse = parse(url, page);
    } catch (final Exception e) {
      LOG.warn("Error parsing: " + url + ": " + StringUtils.stringifyException(e));
      return redirectedPage;
    }

    if (parse == null) {
      return redirectedPage;
    }

    final byte[] signature = sig.calculate(page);

    org.apache.nutch.storage.ParseStatus pstatus = parse.getParseStatus();
    page.setParseStatus(pstatus);
    if (ParseStatusUtils.isSuccess(pstatus)) {
      if (pstatus.getMinorCode() == ParseStatusCodes.SUCCESS_REDIRECT) {
        String newUrl = ParseStatusUtils.getMessage(pstatus);
        int refreshTime = Integer.parseInt(ParseStatusUtils.getArg(pstatus, 1));
        try {
          newUrl = normalizers.normalize(newUrl, URLNormalizers.SCOPE_FETCHER);
          newUrl = filters.filter(newUrl);
        } catch (URLFilterException e) {
          return redirectedPage; // TODO: is this correct
        } catch (MalformedURLException e) {
          return redirectedPage;
        }
        if (newUrl == null || newUrl.equals(url)) {
          String reprUrl = URLUtil.chooseRepr(url, newUrl,
              refreshTime < FetcherJob.PERM_REFRESH_TIME);
          WebPage newWebPage = new WebPage();
          page.setReprUrl(new Utf8(reprUrl));
          page.putToMetadata(FetcherJob.REDIRECT_DISCOVERED, TableUtil.YES_VAL);
          redirectedPage = new URLWebPage(reprUrl, newWebPage);
        }
      } else {
        page.setText(new Utf8(parse.getText()));
        page.setTitle(new Utf8(parse.getTitle()));
        ByteBuffer prevSig = page.getSignature();
        if (prevSig != null) {
          page.setPrevSignature(prevSig);
        }
        page.setSignature(ByteBuffer.wrap(signature));
        if (page.getOutlinks() != null) {
          page.getOutlinks().clear();
        }
        final Outlink[] outlinks = parse.getOutlinks();
        final int count = 0;
        String fromHost;
        if (ignoreExternalLinks) {
          try {
            fromHost = new URL(url).getHost().toLowerCase();
          } catch (final MalformedURLException e) {
            fromHost = null;
          }
        } else {
          fromHost = null;
        }
        for (int i = 0; count < maxOutlinks && i < outlinks.length; i++) {
          String toUrl = outlinks[i].getToUrl();
          try {
            toUrl = normalizers.normalize(toUrl, URLNormalizers.SCOPE_OUTLINK);
            toUrl = filters.filter(toUrl);
          } catch (final URLFilterException e) {
            continue;
          }
          catch (MalformedURLException e2){
            continue;
          }
          if (toUrl == null) {
            continue;
          }
          String toHost;
          if (ignoreExternalLinks) {
            try {
              toHost = new URL(toUrl).getHost().toLowerCase();
            } catch (final MalformedURLException e) {
              toHost = null;
            }
            if (toHost == null || !toHost.equals(fromHost)) { // external links
              continue; // skip it
            }
          }

          page.putToOutlinks(new Utf8(toUrl), new Utf8(outlinks[i].getAnchor()));
        }
        Mark.PARSE_MARK.putMark(page, Mark.FETCH_MARK.checkMark(page));
      }
    }
    return redirectedPage;
  }
}
File
ParseUtil.java
Developer's decision
Version 2
Kind of conflict
If statement
Method declaration
Method invocation
Return statement
Try statement
Variable
Chunk
Conflicting content
      System.out.println("usage:" + usage);
      return;
    }
<<<<<<< HEAD
    Options opts = new Options();
    Configuration conf = NutchConfiguration.create();
    
    GenericOptionsParser parser =
      new GenericOptionsParser(conf, opts, argv);
    
    String[] remainingArgs = parser.getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    
=======
    
    GenericOptionsParser optParser =
      new GenericOptionsParser(NutchConfiguration.create(), args);
    String[] argv = optParser.getRemainingArgs();
    Configuration conf = optParser.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    try {
      int recno = Integer.parseInt(remainingArgs[0]);
      String segment = remainingArgs[1];
Solution content
      System.out.println("usage:" + usage);
      return;
    }
    
    GenericOptionsParser optParser =
      new GenericOptionsParser(NutchConfiguration.create(), args);
    String[] argv = optParser.getRemainingArgs();
    Configuration conf = optParser.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
    try {
File
Content.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Variable
Chunk
Conflicting content
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
<<<<<<< HEAD
=======
import org.apache.nutch.util.NutchJobConf;
import org.apache.nutch.util.TableUtil;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.gora.mapreduce.GoraMapper;
Solution content
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchJobConf;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.gora.mapreduce.GoraMapper;
File
DomainStatistics.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
    int mode = 0;
/**
 * Extracts some very basic statistics about domains from the crawldb
 */
<<<<<<< HEAD
public class DomainStatistics
extends MapReduceBase
implements Tool, Mapper,
           Reducer {

  private static final Log LOG = LogFactory.getLog(DomainStatistics.class);
  
  private static final Text FETCHED_TEXT = new Text("FETCHED");
  private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");
  
  public static enum MyCounter {FETCHED, NOT_FETCHED, EMPTY_RESULT};
  
  private static final int MODE_HOST = 1;
  private static final int MODE_DOMAIN = 2;
  private static final int MODE_SUFFIX = 3;
  
  private int mode = 0;
  
  private Configuration conf;
  
  public int run(String[] args) throws IOException {
    if (args.length < 3) {
      System.out.println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
      return 1;
    }
    String inputDir = args[0];
    String outputDir = args[1];
    int numOfReducers = 1;
    
    if (args.length > 3) {
      numOfReducers = Integer.parseInt(args[3]);
    }

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("DomainStatistics: starting at " + sdf.format(start));

    JobConf job = new NutchJob(getConf());
    job.setJobName("Domain statistics");

    if(args[2].equals("host"))
      mode = MODE_HOST;
    else if(args[2].equals("domain"))
      mode = MODE_DOMAIN;
    else if(args[2].equals("suffix"))
      mode = MODE_SUFFIX;
    job.setInt("domain.statistics.mode", mode);
    
    String[] inputDirsSpecs = inputDir.split(",");
    for (int i = 0; i < inputDirsSpecs.length; i++) {
      FileInputFormat.addInputPath(job, new Path(inputDirsSpecs[i]));
    }

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(DomainStatistics.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(TextOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
			long total = 0;
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setReducerClass(DomainStatistics.class);
    job.setCombinerClass(DomainStatisticsCombiner.class);
    job.setNumReduceTasks(numOfReducers);
    
    JobClient.runJob(job);
    
    long end = System.currentTimeMillis();
    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    return 0;
  }

  @Override
  public void configure(JobConf job) {
    super.configure(job);
    mode = job.getInt("domain.statistics.mode", MODE_DOMAIN);
  }
  

  public Configuration getConf() {
    return conf;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public void map(Text urlText, CrawlDatum datum,
      OutputCollector output, Reporter reporter)
  throws IOException {
    
    if(datum.getStatus() == CrawlDatum.STATUS_DB_FETCHED 
        || datum.getStatus() == CrawlDatum.STATUS_FETCH_SUCCESS) {
      try {
        URL url = new URL(urlText.toString());
        String out = null;
        switch (mode) {
          case MODE_HOST:
            out = url.getHost();
            break;
          case MODE_DOMAIN:
            out = URLUtil.getDomainName(url);
            break;
          case MODE_SUFFIX:
            out = URLUtil.getDomainSuffix(url).getDomain();
            break;
        }
        if(out.trim().equals("")) {
          LOG.info("url : " + url);
          reporter.incrCounter(MyCounter.EMPTY_RESULT, 1);
        }
        
        output.collect(new Text(out), new LongWritable(1));
      } catch (Exception ex) { }
      reporter.incrCounter(MyCounter.FETCHED, 1);
      output.collect(FETCHED_TEXT, new LongWritable(1));
    }
    else {
      reporter.incrCounter(MyCounter.NOT_FETCHED, 1);
      output.collect(NOT_FETCHED_TEXT, new LongWritable(1));
    }
  }

  public void reduce(Text key, Iterator values,
      OutputCollector output, Reporter reporter)
  throws IOException {
    
    long total = 0;
    
    while(values.hasNext()) {
      LongWritable val = values.next();
      total += val.get();
    }
    //invert output 
    output.collect(new LongWritable(total), key);
  }
    
  
  public static class DomainStatisticsCombiner extends MapReduceBase
  implements Reducer {

    public void reduce(Text key, Iterator values,
        OutputCollector output, Reporter reporter)
    throws IOException {
      long total = 0;
      
      while(values.hasNext()) {
        LongWritable val = values.next();
        total += val.get();
      } 
      output.collect(key, new LongWritable(total));
    }

  }

  public static void main(String[] args) throws Exception {
    ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(), args);
  }
  
=======
public class DomainStatistics extends Configured implements Tool {

	private static final Log LOG = LogFactory.getLog(DomainStatistics.class);

	private static final Text FETCHED_TEXT = new Text("FETCHED");
	private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");

	public static enum MyCounter {
		FETCHED, NOT_FETCHED, EMPTY_RESULT
	};

	private static final int MODE_HOST = 1;
	private static final int MODE_DOMAIN = 2;
	private static final int MODE_SUFFIX = 3;

	private Configuration conf;

	public int run(String[] args) throws IOException, ClassNotFoundException,
			InterruptedException {
		if (args.length < 3) {
			System.out
					.println("usage: DomainStatistics outDir host|domain|suffix [numOfReducer]");
			return 1;
		}
		String outputDir = args[0];
		int numOfReducers = 1;

		if (args.length > 2) {
			numOfReducers = Integer.parseInt(args[2]);
		}

		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		long start = System.currentTimeMillis();
		LOG.info("DomainStatistics: starting at " + sdf.format(start));

		Job job = new NutchJob(getConf(), "Domain statistics");

		int mode = 0;
		if (args[1].equals("host"))
			mode = MODE_HOST;
		else if (args[1].equals("domain"))
			mode = MODE_DOMAIN;
		else if (args[1].equals("suffix"))
			mode = MODE_SUFFIX;
		job.getConfiguration().setInt("domain.statistics.mode", mode);

		DataStore store = StorageUtils.createDataStore(
				job.getConfiguration(), String.class, WebPage.class);

		Query query = store.newQuery();
		query.setFields(WebPage._ALL_FIELDS);

		GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
				DomainStatisticsMapper.class, null, true);

		FileOutputFormat.setOutputPath(job, new Path(outputDir));

		job.setOutputFormatClass(TextOutputFormat.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);

		job.setReducerClass(DomainStatisticsReducer.class);
		job.setCombinerClass(DomainStatisticsCombiner.class);
		job.setNumReduceTasks(numOfReducers);

		boolean success = job.waitForCompletion(true);

		long end = System.currentTimeMillis();
		LOG.info("DomainStatistics: finished at " + sdf.format(end)
				+ ", elapsed: " + TimingUtil.elapsedTime(start, end));

		if (!success)
			return -1;
		return 0;
	}

	public Configuration getConf() {
		return conf;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	public static class DomainStatisticsCombiner extends
			Reducer {

		@Override
		public void reduce(Text key, Iterable values,
				Context context) throws IOException, InterruptedException {

			long total = 0;

			for (LongWritable val : values)
				total += val.get();

			context.write(key, new LongWritable(total));
		}

	}

	public static class DomainStatisticsReducer extends
			Reducer {

		@Override
		public void reduce(Text key, Iterable values,
				Context context) throws IOException, InterruptedException {


			for (LongWritable val : values)
				total += val.get();

			// invert output
			context.write(new LongWritable(total), key);
		}
	}

	public static class DomainStatisticsMapper extends
			GoraMapper {
		LongWritable COUNT_1 = new LongWritable(1);

		private int mode = 0;

		public DomainStatisticsMapper() {
		}

		public void setup(Context context) {
			mode = context.getConfiguration().getInt("domain.statistics.mode",
					MODE_DOMAIN);
		}

		public void close() {
		}

		@Override
		protected void map(
				String key,
				WebPage value,
				org.apache.hadoop.mapreduce.Mapper.Context context)
				throws IOException, InterruptedException {
			if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
				try {
					URL url = new URL(key.toString());
					String out = null;
					switch (mode) {
					case MODE_HOST:
						out = url.getHost();
						break;
					case MODE_DOMAIN:
						out = URLUtil.getDomainName(url);
						break;
					case MODE_SUFFIX:
						out = URLUtil.getDomainSuffix(url).getDomain();
						break;
					}
					if (out.trim().equals("")) {
						LOG.info("url : " + url);
						context.getCounter(MyCounter.EMPTY_RESULT).increment(1);
					}

					context.write(new Text(out), COUNT_1);
				} catch (Exception ex) {
				}
				context.getCounter(MyCounter.FETCHED).increment(1);
				context.write(FETCHED_TEXT, COUNT_1);
			} else {
				context.getCounter(MyCounter.FETCHED).increment(1);
				context.write(NOT_FETCHED_TEXT, COUNT_1);
			}

		}
	}

	public static void main(String[] args) throws Exception {
		ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(),
				args);
	}

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}
Solution content
/**
 * Extracts some very basic statistics about domains from the crawldb
 */
public class DomainStatistics extends Configured implements Tool {

	private static final Log LOG = LogFactory.getLog(DomainStatistics.class);

	private static final Text FETCHED_TEXT = new Text("FETCHED");
	private static final Text NOT_FETCHED_TEXT = new Text("NOT_FETCHED");

	public static enum MyCounter {
		FETCHED, NOT_FETCHED, EMPTY_RESULT
	};

	private static final int MODE_HOST = 1;
	private static final int MODE_DOMAIN = 2;
	private static final int MODE_SUFFIX = 3;

	private Configuration conf;

	public int run(String[] args) throws IOException, ClassNotFoundException,
			InterruptedException {
		if (args.length < 3) {
			System.out
					.println("usage: DomainStatistics outDir host|domain|suffix [numOfReducer]");
			return 1;
		}
		String outputDir = args[0];
		int numOfReducers = 1;

		if (args.length > 2) {
			numOfReducers = Integer.parseInt(args[2]);
		}

		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		long start = System.currentTimeMillis();
		LOG.info("DomainStatistics: starting at " + sdf.format(start));

		Job job = new NutchJob(getConf(), "Domain statistics");

		int mode = 0;
		if (args[1].equals("host"))
			mode = MODE_HOST;
		else if (args[1].equals("domain"))
			mode = MODE_DOMAIN;
		else if (args[1].equals("suffix"))
			mode = MODE_SUFFIX;
		job.getConfiguration().setInt("domain.statistics.mode", mode);

		DataStore store = StorageUtils.createDataStore(
				job.getConfiguration(), String.class, WebPage.class);

		Query query = store.newQuery();
		query.setFields(WebPage._ALL_FIELDS);

		GoraMapper.initMapperJob(job, query, store, Text.class, LongWritable.class,
				DomainStatisticsMapper.class, null, true);

		FileOutputFormat.setOutputPath(job, new Path(outputDir));

		job.setOutputFormatClass(TextOutputFormat.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);

		job.setReducerClass(DomainStatisticsReducer.class);
		job.setCombinerClass(DomainStatisticsCombiner.class);
		job.setNumReduceTasks(numOfReducers);

		boolean success = job.waitForCompletion(true);

		long end = System.currentTimeMillis();
		LOG.info("DomainStatistics: finished at " + sdf.format(end)
				+ ", elapsed: " + TimingUtil.elapsedTime(start, end));

		if (!success)
			return -1;
		return 0;
	}

	public Configuration getConf() {
		return conf;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	public static class DomainStatisticsCombiner extends
			Reducer {

		@Override
		public void reduce(Text key, Iterable values,
				Context context) throws IOException, InterruptedException {

			long total = 0;

			for (LongWritable val : values)
				total += val.get();

			context.write(key, new LongWritable(total));
		}

	}

	public static class DomainStatisticsReducer extends
			Reducer {

		@Override
		public void reduce(Text key, Iterable values,
				Context context) throws IOException, InterruptedException {

			long total = 0;

			for (LongWritable val : values)
				total += val.get();

			// invert output
			context.write(new LongWritable(total), key);
		}
	}

	public static class DomainStatisticsMapper extends
			GoraMapper {
		LongWritable COUNT_1 = new LongWritable(1);

		private int mode = 0;

		public DomainStatisticsMapper() {
		}

		public void setup(Context context) {
			mode = context.getConfiguration().getInt("domain.statistics.mode",
					MODE_DOMAIN);
		}

		public void close() {
		}

		@Override
		protected void map(
				String key,
				WebPage value,
				org.apache.hadoop.mapreduce.Mapper.Context context)
				throws IOException, InterruptedException {
			if (value.getStatus() == CrawlStatus.STATUS_FETCHED) {
				try {
					URL url = new URL(key.toString());
					String out = null;
					switch (mode) {
					case MODE_HOST:
						out = url.getHost();
						break;
					case MODE_DOMAIN:
						out = URLUtil.getDomainName(url);
						break;
					case MODE_SUFFIX:
						out = URLUtil.getDomainSuffix(url).getDomain();
						break;
					}
					if (out.trim().equals("")) {
						LOG.info("url : " + url);
						context.getCounter(MyCounter.EMPTY_RESULT).increment(1);
					}

					context.write(new Text(out), COUNT_1);
				} catch (Exception ex) {
				}
				context.getCounter(MyCounter.FETCHED).increment(1);
				context.write(FETCHED_TEXT, COUNT_1);
			} else {
				context.getCounter(MyCounter.FETCHED).increment(1);
				context.write(NOT_FETCHED_TEXT, COUNT_1);
			}

		}
	}

	public static void main(String[] args) throws Exception {
		ToolRunner.run(NutchConfiguration.create(), new DomainStatistics(),
				args);
	}

}
File
DomainStatistics.java
Developer's decision
Version 2
Kind of conflict
Annotation
Attribute
Class declaration
Class signature
Enum declaration
Method declaration
Method invocation
Chunk
Conflicting content
package org.creativecommons.nutch;

<<<<<<< HEAD
import org.apache.nutch.metadata.CreativeCommons;

import org.apache.nutch.parse.Parse;

import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.hadoop.io.Text;

import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.metadata.Metadata;

import org.apache.hadoop.conf.Configuration;
=======
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashSet;
import java.util.StringTokenizer;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

import org.apache.avro.util.Utf8;
import org.apache.commons.logging.Log;
Solution content
package org.creativecommons.nutch;

import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashSet;
import java.util.StringTokenizer;

import org.apache.avro.util.Utf8;
import org.apache.commons.logging.Log;
File
CCIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
/** Adds basic searchable fields to a document. */
public class CCIndexingFilter implements IndexingFilter {
<<<<<<< HEAD
  public static final Log LOG = LogFactory.getLog(CCIndexingFilter.class);

  /** The name of the document field we use. */
  public static String FIELD = "cc";

  private Configuration conf;

  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
    throws IndexingException {
    
    Metadata metadata = parse.getData().getParseMeta();
    // index the license
    String licenseUrl = metadata.get(CreativeCommons.LICENSE_URL);
    if (licenseUrl != null) {
      if (LOG.isInfoEnabled()) {
        LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
      }

      // add the entire license as cc:license=xxx
      addFeature(doc, "license=" + licenseUrl);

      // index license attributes extracted of the license url
      addUrlFeatures(doc, licenseUrl);
    }

    // index the license location as cc:meta=xxx
    String licenseLocation = metadata.get(CreativeCommons.LICENSE_LOCATION);
    if (licenseLocation != null) {
      addFeature(doc, "meta=" + licenseLocation);
    }

    // index the work type cc:type=xxx
    String workType = metadata.get(CreativeCommons.WORK_TYPE);
    if (workType != null) {
      addFeature(doc, workType);
    }

    return doc;
  }

  /** Add the features represented by a license URL.  Urls are of the form
   * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
   * license feature. */
  public void addUrlFeatures(NutchDocument doc, String urlString) {
    try {
      URL url = new URL(urlString);

      // tokenize the path of the url, breaking at slashes and dashes
      StringTokenizer names = new StringTokenizer(url.getPath(), "/-");

      if (names.hasMoreTokens())
        names.nextToken();                        // throw away "licenses"

      // add a feature per component after "licenses"
      while (names.hasMoreTokens()) {
        String feature = names.nextToken();
        addFeature(doc, feature);
      }
    } catch (MalformedURLException e) {
      if (LOG.isWarnEnabled()) {
        LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
      }
    }
  }
  
  private void addFeature(NutchDocument doc, String feature) {
    doc.add(FIELD, feature);
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  public Configuration getConf() {
    return this.conf;
  }
=======
	public static final Log LOG = LogFactory.getLog(CCIndexingFilter.class);

	/** The name of the document field we use. */
	public static String FIELD = "cc";

	private Configuration conf;

	private static final Collection FIELDS = new HashSet();

	static {
		FIELDS.add(WebPage.Field.BASE_URL);
		FIELDS.add(WebPage.Field.METADATA);
	}

	/**
	 * Add the features represented by a license URL. Urls are of the form
	 * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
	 * license feature.
	 */
	public void addUrlFeatures(NutchDocument doc, String urlString) {
		try {
			URL url = new URL(urlString);

			// tokenize the path of the url, breaking at slashes and dashes
			StringTokenizer names = new StringTokenizer(url.getPath(), "/-");

			if (names.hasMoreTokens())
				names.nextToken(); // throw away "licenses"

			// add a feature per component after "licenses"
			while (names.hasMoreTokens()) {
				String feature = names.nextToken();
				addFeature(doc, feature);
			}
		} catch (MalformedURLException e) {
			if (LOG.isWarnEnabled()) {
				LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
			}
		}
	}

	private void addFeature(NutchDocument doc, String feature) {
		doc.add(FIELD, feature);
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	public Configuration getConf() {
		return this.conf;
	}

	@Override
	public Collection getFields() {
		return FIELDS;
	}

	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {

		ByteBuffer blicense = page.getFromMetadata(new Utf8(
				CreativeCommons.LICENSE_URL));
		if (blicense != null) {
			String licenseUrl = new String(blicense.array());
			if (LOG.isInfoEnabled()) {
				LOG.info("CC: indexing " + licenseUrl + " for: "
						+ url.toString());
			}

			// add the entire license as cc:license=xxx
			addFeature(doc, "license=" + licenseUrl);

			// index license attributes extracted of the license url
			addUrlFeatures(doc, licenseUrl);
		}

		// index the license location as cc:meta=xxx
		ByteBuffer blicenseloc = page.getFromMetadata(new Utf8(
				CreativeCommons.LICENSE_LOCATION));
		if (blicenseloc != null) {
			String licenseLocation = new String(blicenseloc.array());
			addFeature(doc, "meta=" + licenseLocation);
		}

		// index the work type cc:type=xxx
		ByteBuffer bworkType = page.getFromMetadata(new Utf8(
				CreativeCommons.WORK_TYPE));
		if (bworkType != null) {
			String workType = new String(bworkType.array());
			addFeature(doc, workType);
		}

		return doc;
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

}
Solution content
/** Adds basic searchable fields to a document. */
public class CCIndexingFilter implements IndexingFilter {
	public static final Log LOG = LogFactory.getLog(CCIndexingFilter.class);

	/** The name of the document field we use. */
	public static String FIELD = "cc";

	private Configuration conf;

	private static final Collection FIELDS = new HashSet();

	static {
		FIELDS.add(WebPage.Field.BASE_URL);
		FIELDS.add(WebPage.Field.METADATA);
	}

	/**
	 * Add the features represented by a license URL. Urls are of the form
	 * "http://creativecommons.org/licenses/xx-xx/xx/xx", where "xx" names a
	 * license feature.
	 */
	public void addUrlFeatures(NutchDocument doc, String urlString) {
		try {
			URL url = new URL(urlString);

			// tokenize the path of the url, breaking at slashes and dashes
			StringTokenizer names = new StringTokenizer(url.getPath(), "/-");

			if (names.hasMoreTokens())
				names.nextToken(); // throw away "licenses"

			// add a feature per component after "licenses"
			while (names.hasMoreTokens()) {
				String feature = names.nextToken();
				addFeature(doc, feature);
			}
		} catch (MalformedURLException e) {
			if (LOG.isWarnEnabled()) {
				LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
			}
		}
	}

	private void addFeature(NutchDocument doc, String feature) {
		doc.add(FIELD, feature);
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	public Configuration getConf() {
		return this.conf;
	}

	@Override
	public Collection getFields() {
		return FIELDS;
	}

	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {

		ByteBuffer blicense = page.getFromMetadata(new Utf8(
				CreativeCommons.LICENSE_URL));
		if (blicense != null) {
			String licenseUrl = new String(blicense.array());
			if (LOG.isInfoEnabled()) {
				LOG.info("CC: indexing " + licenseUrl + " for: "
						+ url.toString());
			}

			// add the entire license as cc:license=xxx
			addFeature(doc, "license=" + licenseUrl);

			// index license attributes extracted of the license url
			addUrlFeatures(doc, licenseUrl);
		}

		// index the license location as cc:meta=xxx
		ByteBuffer blicenseloc = page.getFromMetadata(new Utf8(
				CreativeCommons.LICENSE_LOCATION));
		if (blicenseloc != null) {
			String licenseLocation = new String(blicenseloc.array());
			addFeature(doc, "meta=" + licenseLocation);
		}

		// index the work type cc:type=xxx
		ByteBuffer bworkType = page.getFromMetadata(new Utf8(
				CreativeCommons.WORK_TYPE));
		if (bworkType != null) {
			String workType = new String(bworkType.array());
			addFeature(doc, workType);
		}

		return doc;
	}

}
File
CCIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Annotation
Attribute
Comment
Method declaration
Method invocation
Static initializer
Chunk
Conflicting content
    // check later
	private static final File testDir = new File(
			System.getProperty("test.input"));

<<<<<<< HEAD
  public void testPages() throws Exception {
    pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
             "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
    // Tika returns  whereas parse-html returns 
    // check later
    pageTest(new File(testDir, "rel.html"), "http://foo.com/",
             "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
    // Tika returns  whereas parse-html returns 
    pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
             "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
  }
=======
	public void testPages() throws Exception {
		pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
		// Tika returns  whereas parse-html returns 
		// check later
		pageTest(new File(testDir, "rel.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
		// Tika returns  whereas parse-html returns 
		// check later
		pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

	public void pageTest(File file, String url, String license,
			String location, String type) throws Exception {
Solution content
	private static final File testDir = new File(
			System.getProperty("test.input"));

	public void testPages() throws Exception {
		pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
		// Tika returns  whereas parse-html returns 
		// check later
		pageTest(new File(testDir, "rel.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
		// Tika returns  whereas parse-html returns 
		// check later
		pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
				"http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
	}

	public void pageTest(File file, String url, String license,
			String location, String type) throws Exception {
File
TestCCParseFilter.java
Developer's decision
Version 1
Kind of conflict
Method declaration
Chunk
Conflicting content
		byte[] bytes = out.toByteArray();
		Configuration conf = NutchConfiguration.create();

<<<<<<< HEAD
    Content content =
      new Content(url, url, bytes, contentType, new Metadata(), conf);
    Parse parse =  new ParseUtil(conf).parse(content).get(content.getUrl());
    
    Metadata metadata = parse.getData().getParseMeta();
    assertEquals(license, metadata.get("License-Url"));
    assertEquals(location, metadata.get("License-Location"));
    assertEquals(type, metadata.get("Work-Type"));
  }
}
=======
		WebPage page = new WebPage();
		page.setBaseUrl(new Utf8(url));
		page.setContent(ByteBuffer.wrap(bytes));
		MimeUtil mimeutil = new MimeUtil(conf);
		MimeType mtype = mimeutil.getMimeType(file);
		page.setContentType(new Utf8(mtype.getName()));

		new ParseUtil(conf).parse(url, page);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

		ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
		assertEquals(license, new String(bb.array()));
Solution content
		byte[] bytes = out.toByteArray();
		Configuration conf = NutchConfiguration.create();

		WebPage page = new WebPage();
		page.setBaseUrl(new Utf8(url));
		page.setContent(ByteBuffer.wrap(bytes));
		MimeUtil mimeutil = new MimeUtil(conf);
		MimeType mtype = mimeutil.getMimeType(file);
		page.setContentType(new Utf8(mtype.getName()));

		new ParseUtil(conf).parse(url, page);

		ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
		assertEquals(license, new String(bb.array()));
File
TestCCParseFilter.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Variable
Chunk
Conflicting content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD
import org.apache.nutch.parse.Parse;
=======
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

/**
 * Indexing filter that indexes all inbound anchor text for a document.
Solution content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.TableUtil;

/**
 * Indexing filter that indexes all inbound anchor text for a document.
File
AnchorIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
    return doc;
  }

<<<<<<< HEAD
=======
  @Override
  public Collection getFields() {
    return FIELDS;
  }

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}
Solution content
    return doc;
  }

  @Override
  public Collection getFields() {
    return FIELDS;
  }

}
File
AnchorIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method declaration
Chunk
Conflicting content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD
import org.apache.hadoop.io.Text;

import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;

import java.net.MalformedURLException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
=======
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.TableUtil;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

/** Adds basic searchable fields to a document. */
public class BasicIndexingFilter implements IndexingFilter {
Solution content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.TableUtil;

/** Adds basic searchable fields to a document. */
public class BasicIndexingFilter implements IndexingFilter {
File
BasicIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
    return doc;
  }

<<<<<<< HEAD
=======
  public void addIndexBackendOptions(Configuration conf) {
  }

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);
Solution content
    return doc;
  }

  public void addIndexBackendOptions(Configuration conf) {
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);
File
BasicIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Method declaration
Chunk
Conflicting content
 */
package org.apache.nutch.indexer.more;

<<<<<<< HEAD

import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.apache.oro.text.regex.Perl5Pattern;
import org.apache.oro.text.regex.PatternMatcher;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.tika.mime.MimeType;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.nutch.metadata.Metadata;

import org.apache.nutch.net.protocols.HttpDateFormat;
import org.apache.nutch.net.protocols.Response;

import org.apache.nutch.parse.Parse;

import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.NutchDocument;

import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.util.MimeUtil;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;

=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
Solution content
 */
package org.apache.nutch.indexer.more;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collection;
File
MoreIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
    try {
      time = HttpDateFormat.toLong(date);
    } catch (ParseException e) {
<<<<<<< HEAD
	// try to parse it as date in alternative format
	try {
	    Date parsedDate = DateUtils.parseDate(date,
		  new String [] {
		      "EEE MMM dd HH:mm:ss yyyy",
		      "EEE MMM dd HH:mm:ss yyyy zzz",
		      "EEE MMM dd HH:mm:ss zzz yyyy",
		      "EEE, MMM dd HH:mm:ss yyyy zzz",
		      "EEE, dd MMM yyyy HH:mm:ss zzz",
		      "EEE,dd MMM yyyy HH:mm:ss zzz",
		      "EEE, dd MMM yyyy HH:mm:sszzz",
		      "EEE, dd MMM yyyy HH:mm:ss",
		      "EEE, dd-MMM-yy HH:mm:ss zzz",
		      "yyyy/MM/dd HH:mm:ss.SSS zzz",
		      "yyyy/MM/dd HH:mm:ss.SSS",
		      "yyyy/MM/dd HH:mm:ss zzz",
		      "yyyy/MM/dd",
		      "yyyy.MM.dd HH:mm:ss",
		      "yyyy-MM-dd HH:mm",
		      "MMM dd yyyy HH:mm:ss. zzz",
		      "MMM dd yyyy HH:mm:ss zzz",
		      "dd.MM.yyyy HH:mm:ss zzz",
		      "dd MM yyyy HH:mm:ss zzz",
		      "dd.MM.yyyy; HH:mm:ss",
		      "dd.MM.yyyy HH:mm:ss",
		      "dd.MM.yyyy zzz"
		  });
	    time = parsedDate.getTime();
            // if (LOG.isWarnEnabled()) {
	    //   LOG.warn(url + ": parsed date: " + date +" to:"+time);
            // }
	} catch (Exception e2) {
            if (LOG.isWarnEnabled()) {
	      LOG.warn(url + ": can't parse erroneous date: " + date);
            }
	}
=======
      // try to parse it as date in alternative format
      try {
        Date parsedDate = DateUtils.parseDate(date, new String[] {
            "EEE MMM dd HH:mm:ss yyyy", "EEE MMM dd HH:mm:ss yyyy zzz",
            "EEE MMM dd HH:mm:ss zzz yyyy", "EEE, dd MMM yyyy HH:mm:ss zzz",
            "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
            "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
            "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
            "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
            "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
            "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
            "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
            "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz" });
        time = parsedDate.getTime();
        // if (LOG.isWarnEnabled()) {
        // LOG.warn(url + ": parsed date: " + date +" to:"+time);
        // }
      } catch (Exception e2) {
        if (LOG.isWarnEnabled()) {
          LOG.warn(url + ": can't parse erroneous date: " + date);
        }
      }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    }
    return time;
  }
Solution content
    try {
      time = HttpDateFormat.toLong(date);
    } catch (ParseException e) {
      // try to parse it as date in alternative format
      try {
        Date parsedDate = DateUtils.parseDate(date, new String[] {
            "EEE MMM dd HH:mm:ss yyyy", "EEE MMM dd HH:mm:ss yyyy zzz",
            "EEE MMM dd HH:mm:ss zzz yyyy", "EEE, dd MMM yyyy HH:mm:ss zzz",
            "EEE,dd MMM yyyy HH:mm:ss zzz", "EEE, dd MMM yyyy HH:mm:sszzz",
            "EEE, dd MMM yyyy HH:mm:ss", "EEE, dd-MMM-yy HH:mm:ss zzz",
            "yyyy/MM/dd HH:mm:ss.SSS zzz", "yyyy/MM/dd HH:mm:ss.SSS",
            "yyyy/MM/dd HH:mm:ss zzz", "yyyy/MM/dd", "yyyy.MM.dd HH:mm:ss",
            "yyyy-MM-dd HH:mm", "MMM dd yyyy HH:mm:ss. zzz",
            "MMM dd yyyy HH:mm:ss zzz", "dd.MM.yyyy HH:mm:ss zzz",
            "dd MM yyyy HH:mm:ss zzz", "dd.MM.yyyy; HH:mm:ss",
            "dd.MM.yyyy HH:mm:ss", "dd.MM.yyyy zzz" });
        time = parsedDate.getTime();
        // if (LOG.isWarnEnabled()) {
        // LOG.warn(url + ": parsed date: " + date +" to:"+time);
        // }
      } catch (Exception e2) {
        if (LOG.isWarnEnabled()) {
          LOG.warn(url + ": can't parse erroneous date: " + date);
        }
      }
    }
    return time;
  }
File
MoreIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Comment
Try statement
Chunk
Conflicting content
    return doc;
  }

<<<<<<< HEAD
=======
  public void addIndexBackendOptions(Configuration conf) {
  }

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);
Solution content
    return doc;
  }

  public void addIndexBackendOptions(Configuration conf) {
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);
File
MoreIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Method declaration
Chunk
Conflicting content
   * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) 
* Only the first occurence of language is stored. */ <<<<<<< HEAD public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) { Parse parse = parseResult.get(content.getUrl()); String lang = getLanguageFromMetadata(parse.getData().getParseMeta()); if (lang != null) { parse.getData().getParseMeta().set(Metadata.LANGUAGE, lang); return parseResult; } // Trying to find the document's language LanguageParser parser = new LanguageParser(doc); lang = parser.getLanguage(); ======= public Parse filter(String url, WebPage page, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) { String lang = null; ByteBuffer blang = getLanguageFromMetadata(page.getMetadata()); if (blang == null) { // Trying to find the document's language LanguageParser parser = new LanguageParser(doc); lang = parser.getLanguage(); } else lang = Bytes.toString(blang.array()); >>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f if (lang != null) { // parse..getParseMeta().set(Metadata.LANGUAGE, lang);
Solution content
   * (http://www.w3.org/TR/REC-html40/struct/global.html#h-7.4.4.2) 
* Only the first occurence of language is stored. */ public Parse filter(String url, WebPage page, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) { String lang = null; ByteBuffer blang = getLanguageFromMetadata(page.getMetadata()); if (blang == null) { // Trying to find the document's language LanguageParser parser = new LanguageParser(doc); lang = parser.getLanguage(); } else lang = Bytes.toString(blang.array()); if (lang != null) { // parse..getParseMeta().set(Metadata.LANGUAGE, lang);
File
HTMLLanguageParser.java
Developer's decision
Version 2
Kind of conflict
Comment
If statement
Method invocation
Method signature
Variable
Chunk
Conflicting content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD
import org.apache.hadoop.io.Text;
import org.apache.nutch.parse.Parse;
=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.storage.WebPage;
Solution content
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.storage.WebPage;
File
LanguageIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Import
Chunk
Conflicting content
    return doc;
  }

<<<<<<< HEAD
=======
  public Collection getFields() {
    return FIELDS;
  }

  public void addIndexBackendOptions(Configuration conf) {
  }

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.languageIdentifier = new LanguageIdentifier(conf);
Solution content
    return doc;
  }

  public Collection getFields() {
    return FIELDS;
  }

  public void addIndexBackendOptions(Configuration conf) {
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    this.languageIdentifier = new LanguageIdentifier(conf);
File
LanguageIndexingFilter.java
Developer's decision
Version 2
Kind of conflict
Method declaration
Chunk
Conflicting content
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
<<<<<<< HEAD

// Nutch imports
=======
import org.apache.lucene.analysis.Token;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.util.LogUtil;

/**
Solution content
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Token;
import org.apache.nutch.util.LogUtil;

/**
File
NGramProfile.java
Developer's decision
Version 2
Kind of conflict
Comment
Import
Chunk
Conflicting content
   * @param t
   *          is the Token to be added
   */
<<<<<<< HEAD
  public void add(String token) {
    add(new StringBuffer().append(SEPARATOR)
                          .append(token)
                          .append(SEPARATOR));
=======
  public void add(Token t) {
    add(new StringBuffer().append(SEPARATOR).append(t.term()).append(SEPARATOR));
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  /**
Solution content
   * @param t
   *          is the Token to be added
   */
  public void add(Token t) {
    add(new StringBuffer().append(SEPARATOR).append(t.term()).append(SEPARATOR));
  }

  /**
File
NGramProfile.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Method signature
Chunk
Conflicting content
      "document 2 title</head><body>this is english</body></html>",
      "<html><head><meta name=\"dc.language\" content=\"en\"><title>document 3 title</head><body>this is english</body></html>" };

<<<<<<< HEAD
  // Tika does not return "fi" but null
=======
  // known issue with attributed not being passed by Tika
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  String metalanguages[] = { "fi", "en", "en" };

  /**</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>      "<html><head><meta http-equiv=\"content-language\" content=\"en\"><title>document 2 title</head><body>this is english</body></html>",
      "<html><head><meta name=\"dc.language\" content=\"en\"><title>document 3 title</head><body>this is english</body></html>" };

  // known issue with attributed not being passed by Tika
  String metalanguages[] = { "fi", "en", "en" };

  /**</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestHTMLLanguageParser.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>import junit.framework.TestSuite;
import junit.textui.TestRunner;

<<<<<<< HEAD
=======
import org.apache.lucene.analysis.Token;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.analysis.lang.NGramProfile.NGramEntry;
import org.apache.nutch.util.NutchConfiguration;
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>import junit.framework.TestSuite;
import junit.textui.TestRunner;

import org.apache.lucene.analysis.Token;
import org.apache.nutch.analysis.lang.NGramProfile.NGramEntry;
import org.apache.nutch.util.NutchConfiguration;
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestLanguageIdentifier.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre> */
public class TestLanguageIdentifier extends TestCase {

<<<<<<< HEAD
	public TestLanguageIdentifier(String testName) {
		super(testName);
	}

	public static Test suite() {
		return new TestSuite(TestLanguageIdentifier.class);
	}

	public static void main(String[] args) {
		TestRunner.run(suite());
	}

	String tokencontent1 = "testaddtoken";
	String tokencontent2 = "anotherteststring";

	int[] counts1 = { 3, 2, 2, 1, 1, 1, 1, 1 };

	String[] chars1 = { "t", "d", "e", "a", "k", "n", "o", "s" };

	/**
	 * Test addFromToken method
	 * 
	 */
	public void testAddToken() {

		NGramProfile p = new NGramProfile("test", 1, 1);

		p.add(tokencontent1);
		p.normalize();

		testCounts(p.getSorted(), counts1);
		testContents(p.getSorted(), chars1);
	}

	/**
	 * Test analyze method
	 */
	public void testAnalyze() {
		String tokencontent = "testmeagain";

		NGramProfile p = new NGramProfile("test", 1, 1);
		p.analyze(new StringBuilder(tokencontent));

		// test that profile size is ok, eg 9 different NGramEntries "tesmagin"
		assertEquals(8, p.getSorted().size());
	}

	/**
	 * Test addNGrams method with StringBuffer argument
	 * 
	 */
	public void testAddNGramsStringBuffer() {
		String tokencontent = "testmeagain";
=======
  public TestLanguageIdentifier(String testName) {
    super(testName);
  }

  public static Test suite() {
    return new TestSuite(TestLanguageIdentifier.class);
  }

  public static void main(String[] args) {
    TestRunner.run(suite());
  }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

		NGramProfile p = new NGramProfile("test", 1, 1);
		p.add(new StringBuffer(tokencontent));</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre> */
public class TestLanguageIdentifier extends TestCase {

  public TestLanguageIdentifier(String testName) {
    super(testName);
  }

  public static Test suite() {
    return new TestSuite(TestLanguageIdentifier.class);
  }

  public static void main(String[] args) {
    TestRunner.run(suite());
  }
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestLanguageIdentifier.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Array initializer</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method signature</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>

	}

<<<<<<< HEAD
	/**
	 * test getSorted method
	 */
	public void testGetSorted() {
		int[] count = { 4, 3, 1 };
		String[] ngram = { "a", "b", "c" };
		String teststring = "AAaaBbbC";

		NGramProfile p = new NGramProfile("test", 1, 1);
		p.analyze(new StringBuilder(teststring));

		// test size of profile
		assertEquals(3, p.getSorted().size());

		testCounts(p.getSorted(), count);
		testContents(p.getSorted(), ngram);

	}

	public void testGetSimilarity() {
		NGramProfile a = new NGramProfile("a", 1, 1);
		NGramProfile b = new NGramProfile("b", 1, 1);

		a.analyze(new StringBuilder(tokencontent1));
		b.analyze(new StringBuilder(tokencontent2));

		// because of rounding errors might slightly return different results
		assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000002);

	}

	public void testExactMatch() {
		NGramProfile a = new NGramProfile("a", 1, 1);

		a.analyze(new StringBuilder(tokencontent1));

		assertEquals(a.getSimilarity(a), 0, 0);

	}

	public void testIO() {
		// Create profile and set some contents
		NGramProfile a = new NGramProfile("a", 1, 1);
		a.analyze(new StringBuilder(this.tokencontent1));

		NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1);

		// save profile
		ByteArrayOutputStream os = new ByteArrayOutputStream();

		try {
			a.save(os);
			os.close();
		} catch (Exception e) {
			fail();
		}

		// load profile
		InputStream is = new ByteArrayInputStream(os.toByteArray());
		try {
			b.load(is);
			is.close();
		} catch (Exception e) {
			fail();
		}

		// check it
		testCounts(b.getSorted(), counts1);
		testContents(b.getSorted(), chars1);
	}

	private void testContents(List<NGramEntry> entries, String contents[]) {
		int c = 0;

		for (NGramEntry nge : entries) {
			assertEquals(contents[c], nge.getSeq().toString());
			c++;
		}
	}

	private void testCounts(List<NGramEntry> entries, int counts[]) {
		int c = 0;

		for (NGramEntry nge : entries) {
			System.out.println(nge);
			assertEquals(counts[c], nge.getCount());
			c++;
		}
	}

	public void testIdentify() {
		try {
			long total = 0;
			LanguageIdentifier idfr = new LanguageIdentifier(
					NutchConfiguration.create());
			BufferedReader in = new BufferedReader(new InputStreamReader(this
					.getClass().getResourceAsStream("test-referencial.txt")));
			String line = null;
			while ((line = in.readLine()) != null) {
				String[] tokens = line.split(";");
				if (!tokens[0].equals("")) {
					long start = System.currentTimeMillis();
					// Identify the whole file
					String lang = idfr.identify(this.getClass()
							.getResourceAsStream(tokens[0]), "UTF-8");
					total += System.currentTimeMillis() - start;
					assertEquals(tokens[1], lang);
					// Then, each line of the file...
					BufferedReader testFile = new BufferedReader(
							new InputStreamReader(this.getClass()
									.getResourceAsStream(tokens[0]), "UTF-8"));
					String testLine = null;
					while ((testLine = testFile.readLine()) != null) {
						testLine = testLine.trim();
						if (testLine.length() > 256) {
							lang = idfr.identify(testLine);
							assertEquals(tokens[1], lang);
						}
					}
					testFile.close();
				}
			}
			in.close();
			System.out.println("Total Time=" + total);
		} catch (Exception e) {
			e.printStackTrace();
			fail(e.toString());
		}
	}
=======
  /**
   * Test addFromToken method
   * 
   */
  public void testAddToken() {

    NGramProfile p = new NGramProfile("test", 1, 1);

    Token t = new Token(tokencontent1, 0, tokencontent1.length());
    p.add(t);
    p.normalize();

    testCounts(p.getSorted(), counts1);
    testContents(p.getSorted(), chars1);
  }

  /**
   * Test analyze method
   */
  public void testAnalyze() {
    String tokencontent = "testmeagain";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.analyze(new StringBuilder(tokencontent));

    // test that profile size is ok, eg 9 different NGramEntries "tesmagin"
    assertEquals(8, p.getSorted().size());
  }

  /**
   * Test addNGrams method with StringBuffer argument
   * 
   */
  public void testAddNGramsStringBuffer() {
    String tokencontent = "testmeagain";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.add(new StringBuffer(tokencontent));

    // test that profile size is ok, eg 8 different NGramEntries "tesmagin"
    assertEquals(8, p.getSorted().size());

  }

  /**
   * test getSorted method
   */
  public void testGetSorted() {
    int[] count = { 4, 3, 1 };
    String[] ngram = { "a", "b", "c" };

    String teststring = "AAaaBbbC";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.analyze(new StringBuilder(teststring));

    // test size of profile
    assertEquals(3, p.getSorted().size());

    testCounts(p.getSorted(), count);
    testContents(p.getSorted(), ngram);

  }

  public void testGetSimilarity() {
    NGramProfile a = new NGramProfile("a", 1, 1);
    NGramProfile b = new NGramProfile("b", 1, 1);

    a.analyze(new StringBuilder(tokencontent1));
    b.analyze(new StringBuilder(tokencontent2));

    // because of rounding errors might slightly return different results
    assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000002);

  }

  public void testExactMatch() {
    NGramProfile a = new NGramProfile("a", 1, 1);

    a.analyze(new StringBuilder(tokencontent1));

    assertEquals(a.getSimilarity(a), 0, 0);

  }

  public void testIO() {
    // Create profile and set some contents
    NGramProfile a = new NGramProfile("a", 1, 1);
    a.analyze(new StringBuilder(this.tokencontent1));

    NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1);

    // save profile
    ByteArrayOutputStream os = new ByteArrayOutputStream();

    try {
      a.save(os);
      os.close();
    } catch (Exception e) {
      fail();
    }

    // load profile
    InputStream is = new ByteArrayInputStream(os.toByteArray());
    try {
      b.load(is);
      is.close();
    } catch (Exception e) {
      fail();
    }

    // check it
    testCounts(b.getSorted(), counts1);
    testContents(b.getSorted(), chars1);
  }

  private void testContents(List<NGramEntry> entries, String contents[]) {
    int c = 0;

    for (NGramEntry nge : entries) {
      assertEquals(contents[c], nge.getSeq().toString());
      c++;
    }
  }

  private void testCounts(List<NGramEntry> entries, int counts[]) {
    int c = 0;

    for (NGramEntry nge : entries) {
      System.out.println(nge);
      assertEquals(counts[c], nge.getCount());
      c++;
    }
  }

  public void testIdentify() {
    try {
      long total = 0;
      LanguageIdentifier idfr = new LanguageIdentifier(NutchConfiguration
          .create());
      BufferedReader in = new BufferedReader(new InputStreamReader(this
          .getClass().getResourceAsStream("test-referencial.txt")));
      String line = null;
      while ((line = in.readLine()) != null) {
        String[] tokens = line.split(";");
        if (!tokens[0].equals("")) {
          long start = System.currentTimeMillis();
          // Identify the whole file
          String lang = idfr.identify(this.getClass().getResourceAsStream(
              tokens[0]), "UTF-8");
          total += System.currentTimeMillis() - start;
          assertEquals(tokens[1], lang);
          // Then, each line of the file...
          BufferedReader testFile = new BufferedReader(new InputStreamReader(
              this.getClass().getResourceAsStream(tokens[0]), "UTF-8"));
          String testLine = null;
          while ((testLine = testFile.readLine()) != null) {
            testLine = testLine.trim();
            if (testLine.length() > 256) {
              lang = idfr.identify(testLine);
              assertEquals(tokens[1], lang);
            }
          }
          testFile.close();
        }
      }
      in.close();
      System.out.println("Total Time=" + total);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.toString());
    }
  }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>
  String[] chars1 = { "t", "d", "e", "a", "k", "n", "o", "s" };

  /**
   * Test addFromToken method
   * 
   */
  public void testAddToken() {

    NGramProfile p = new NGramProfile("test", 1, 1);

    Token t = new Token(tokencontent1, 0, tokencontent1.length());
    p.add(t);
    p.normalize();

    testCounts(p.getSorted(), counts1);
    testContents(p.getSorted(), chars1);
  }

  /**
   * Test analyze method
   */
  public void testAnalyze() {
    String tokencontent = "testmeagain";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.analyze(new StringBuilder(tokencontent));

    // test that profile size is ok, eg 9 different NGramEntries "tesmagin"
    assertEquals(8, p.getSorted().size());
  }

  /**
   * Test addNGrams method with StringBuffer argument
   * 
   */
  public void testAddNGramsStringBuffer() {
    String tokencontent = "testmeagain";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.add(new StringBuffer(tokencontent));

    // test that profile size is ok, eg 8 different NGramEntries "tesmagin"
    assertEquals(8, p.getSorted().size());

  }

  /**
   * test getSorted method
   */
  public void testGetSorted() {
    int[] count = { 4, 3, 1 };
    String[] ngram = { "a", "b", "c" };

    String teststring = "AAaaBbbC";

    NGramProfile p = new NGramProfile("test", 1, 1);
    p.analyze(new StringBuilder(teststring));

    // test size of profile
    assertEquals(3, p.getSorted().size());

    testCounts(p.getSorted(), count);
    testContents(p.getSorted(), ngram);

  }

  public void testGetSimilarity() {
    NGramProfile a = new NGramProfile("a", 1, 1);
    NGramProfile b = new NGramProfile("b", 1, 1);

    a.analyze(new StringBuilder(tokencontent1));
    b.analyze(new StringBuilder(tokencontent2));

    // because of rounding errors might slightly return different results
    assertEquals(a.getSimilarity(b), b.getSimilarity(a), 0.0000002);

  }

  public void testExactMatch() {
    NGramProfile a = new NGramProfile("a", 1, 1);

    a.analyze(new StringBuilder(tokencontent1));

    assertEquals(a.getSimilarity(a), 0, 0);

  }

  public void testIO() {
    // Create profile and set some contents
    NGramProfile a = new NGramProfile("a", 1, 1);
    a.analyze(new StringBuilder(this.tokencontent1));

    NGramProfile b = new NGramProfile("a_from_inputstream", 1, 1);

    // save profile
    ByteArrayOutputStream os = new ByteArrayOutputStream();

    try {
      a.save(os);
      os.close();
    } catch (Exception e) {
      fail();
    }

    // load profile
    InputStream is = new ByteArrayInputStream(os.toByteArray());
    try {
      b.load(is);
      is.close();
    } catch (Exception e) {
      fail();
    }

    // check it
    testCounts(b.getSorted(), counts1);
    testContents(b.getSorted(), chars1);
  }

  private void testContents(List<NGramEntry> entries, String contents[]) {
    int c = 0;

    for (NGramEntry nge : entries) {
      assertEquals(contents[c], nge.getSeq().toString());
      c++;
    }
  }

  private void testCounts(List<NGramEntry> entries, int counts[]) {
    int c = 0;

    for (NGramEntry nge : entries) {
      System.out.println(nge);
      assertEquals(counts[c], nge.getCount());
      c++;
    }
  }

  public void testIdentify() {
    try {
      long total = 0;
      LanguageIdentifier idfr = new LanguageIdentifier(NutchConfiguration
          .create());
      BufferedReader in = new BufferedReader(new InputStreamReader(this
          .getClass().getResourceAsStream("test-referencial.txt")));
      String line = null;
      while ((line = in.readLine()) != null) {
        String[] tokens = line.split(";");
        if (!tokens[0].equals("")) {
          long start = System.currentTimeMillis();
          // Identify the whole file
          String lang = idfr.identify(this.getClass().getResourceAsStream(
              tokens[0]), "UTF-8");
          total += System.currentTimeMillis() - start;
          assertEquals(tokens[1], lang);
          // Then, each line of the file...
          BufferedReader testFile = new BufferedReader(new InputStreamReader(
              this.getClass().getResourceAsStream(tokens[0]), "UTF-8"));
          String testLine = null;
          while ((testLine = testFile.readLine()) != null) {
            testLine = testLine.trim();
            if (testLine.length() > 256) {
              lang = idfr.identify(testLine);
              assertEquals(tokens[1], lang);
            }
          }
          testFile.close();
        }
      }
      in.close();
      System.out.println("Total Time=" + total);
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.toString());
    }
  }

}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestLanguageIdentifier.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    }
    robots = new RobotRulesParser();
  }
<<<<<<< HEAD
  
   // Inherited Javadoc
    public void setConf(Configuration conf) {
        this.conf = conf;
        this.proxyHost = conf.get("http.proxy.host");
        this.proxyPort = conf.getInt("http.proxy.port", 8080);
        this.useProxy = (proxyHost != null && proxyHost.length() > 0);
        this.timeout = conf.getInt("http.timeout", 10000);
        this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
        this.maxDelays = conf.getInt("http.max.delays", 3);
        this.maxThreadsPerHost = conf.getInt("fetcher.threads.per.host", 1);
        this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
                .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
        this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
        this.serverDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
        this.maxCrawlDelay = (long)(conf.getInt("fetcher.max.crawl.delay", -1) * 1000);
        // backward-compatible default setting
        this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true);
        this.useHttp11 = conf.getBoolean("http.useHttp11", false);
        this.robots.setConf(conf);
        this.checkBlocking = conf.getBoolean(Protocol.CHECK_BLOCKING, true);
        this.checkRobots = conf.getBoolean(Protocol.CHECK_ROBOTS, true);
        logConf();
    }
=======

  // Inherited Javadoc
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.proxyHost = conf.get("http.proxy.host");
    this.proxyPort = conf.getInt("http.proxy.port", 8080);
    this.useProxy = (proxyHost != null && proxyHost.length() > 0);
    this.timeout = conf.getInt("http.timeout", 10000);
    this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
    this.maxDelays = conf.getInt("http.max.delays", 3);
    this.maxThreadsPerHost = conf.getInt("fetcher.threads.per.host", 1);
    this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
        .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
    this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
    this.serverDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
    this.maxCrawlDelay = (conf.getInt("fetcher.max.crawl.delay", -1) * 1000);
    // backward-compatible default setting
    this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true);
    this.mimeTypes = new MimeUtil(conf);
    this.useHttp11 = conf.getBoolean("http.useHttp11", false);
    this.robots.setConf(conf);
    this.checkBlocking = conf.getBoolean(Protocol.CHECK_BLOCKING, true);
    this.checkRobots = conf.getBoolean(Protocol.CHECK_ROBOTS, true);
    logConf();
  }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

  // Inherited Javadoc
  public Configuration getConf() {</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    }
    robots = new RobotRulesParser();
  }

  // Inherited Javadoc
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.proxyHost = conf.get("http.proxy.host");
    this.proxyPort = conf.getInt("http.proxy.port", 8080);
    this.useProxy = (proxyHost != null && proxyHost.length() > 0);
    this.timeout = conf.getInt("http.timeout", 10000);
    this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
    this.maxDelays = conf.getInt("http.max.delays", 3);
    this.maxThreadsPerHost = conf.getInt("fetcher.threads.per.host", 1);
    this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
        .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
    this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
    this.serverDelay = (long) (conf.getFloat("fetcher.server.delay", 1.0f) * 1000);
    this.maxCrawlDelay = (conf.getInt("fetcher.max.crawl.delay", -1) * 1000);
    // backward-compatible default setting
    this.byIP = conf.getBoolean("fetcher.threads.per.host.by.ip", true);
    this.mimeTypes = new MimeUtil(conf);
    this.useHttp11 = conf.getBoolean("http.useHttp11", false);
    this.robots.setConf(conf);
    this.checkBlocking = conf.getBoolean(Protocol.CHECK_BLOCKING, true);
    this.checkRobots = conf.getBoolean(Protocol.CHECK_ROBOTS, true);
    logConf();
  }

  // Inherited Javadoc
  public Configuration getConf() {</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">HttpBase.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>  public ProtocolOutput getProtocolOutput(String url, WebPage page) {

    try {
<<<<<<< HEAD
      URL u = new URL(urlString);
      long delay = serverDelay;
      
=======
      URL u = new URL(url);
      long delay = serverDelay;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      if (checkRobots) {
        try {
          if (!robots.isAllowed(this, u)) {</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>  public ProtocolOutput getProtocolOutput(String url, WebPage page) {

    try {
      URL u = new URL(url);
      long delay = serverDelay;
      if (checkRobots) {
        try {
          if (!robots.isAllowed(this, u)) {</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">HttpBase.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>      if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
      try {
        Response response = http.getResponse(new URL(url, "/robots.txt"),
<<<<<<< HEAD
                                             new CrawlDatum(), true);
=======
                                             new WebPage(), true);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
        // try one level of redirection ?
        if (response.getCode() == 301 || response.getCode() == 302) {
          String redirection = response.getHeader("Location");</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>      if (LOG.isTraceEnabled()) { LOG.trace("cache miss " + url); }
      try {
        Response response = http.getResponse(new URL(url, "/robots.txt"),
                                             new WebPage(), true);
        // try one level of redirection ?
        if (response.getCode() == 301 || response.getCode() == 302) {
          String redirection = response.getHeader("Location");</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">RobotRulesParser.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>            } else {
              redir = new URL(redirection);
            }
<<<<<<< HEAD
            
            response = http.getResponse(redir, new CrawlDatum(), true);
=======

            response = http.getResponse(redir, new WebPage(), true);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
          }
        }
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>            } else {
              redir = new URL(redirection);
            }

            response = http.getResponse(redir, new WebPage(), true);
          }
        }
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">RobotRulesParser.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>package org.apache.nutch.microformats.reltag;

// Nutch imports
<<<<<<< HEAD
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.hadoop.io.Text;
import org.apache.nutch.parse.Parse;
=======
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashSet;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>package org.apache.nutch.microformats.reltag;

// Nutch imports
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashSet;

import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">RelTagIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>		return this.conf;
	}

<<<<<<< HEAD
  /* ----------------------------- *
   * <implementation:Configurable> *
   * ----------------------------- */
  
  public void setConf(Configuration conf) {
    this.conf = conf;
  }
=======
	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {
		// Check if some Rel-Tags found, possibly put there by RelTagParser
		ByteBuffer bb = page.getFromMetadata(new Utf8(RelTagParser.REL_TAG));
		
		if (bb != null) {
			String[] tags = new String(bb.array()).split("\t");
			for (int i = 0; i < tags.length; i++) {
				doc.add("tag", tags[i]);
			}
		}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

		return doc;
	}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>		return this.conf;
	}

	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {
		// Check if some Rel-Tags found, possibly put there by RelTagParser
		ByteBuffer bb = page.getFromMetadata(new Utf8(RelTagParser.REL_TAG));
		
		if (bb != null) {
			String[] tags = new String(bb.array()).split("\t");
			for (int i = 0; i < tags.length; i++) {
				doc.add("tag", tags[i]);
			}
		}

		return doc;
	}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">RelTagIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Annotation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> If statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method signature</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>      testDOMs[i]= node;
    }
    try {
<<<<<<< HEAD
     answerOutlinks = new Outlink[][]{ 
         {
           new Outlink("http://www.nutch.org", "anchor"),
         },
         {
           new Outlink("http://www.nutch.org/", "home"),
           new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
         },
         {
           new Outlink("http://www.nutch.org/", "separate this"),
           new Outlink("http://www.nutch.org/docs/ok", "from this"),
         },
         {
           new Outlink("http://www.nutch.org/", "home"),
           new Outlink("http://www.nutch.org/docs/1", "1"),
           new Outlink("http://www.nutch.org/docs/2", "2"),
         },
         {
           new Outlink("http://www.nutch.org/frames/top.html", ""),
           new Outlink("http://www.nutch.org/frames/left.html", ""),
           new Outlink("http://www.nutch.org/frames/invalid.html", ""),
           new Outlink("http://www.nutch.org/frames/right.html", ""),
         },
         {
           new Outlink("http://www.nutch.org/maps/logo.gif", ""),
           new Outlink("http://www.nutch.org/index.html", ""),
           new Outlink("http://www.nutch.org/maps/#bottom", ""),
           new Outlink("http://www.nutch.org/bot.html", ""),
           new Outlink("http://www.nutch.org/docs/index.html", ""),
         },
         {
             new Outlink("http://www.nutch.org/index.html", "whitespace test"),
         },
         {
         },
         {
           new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
         },
         {
         },
         {
           new Outlink("http://www.nutch.org/;x", "anchor1"),
           new Outlink("http://www.nutch.org/g;x", "anchor2"),
           new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
         },
         {
           new Outlink("http://www.nutch.org/g;something", "anchor1"),
           new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
           new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
           new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
           new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5")
         },
         {
           new Outlink("http://www.nutch.org/g", ""),
           new Outlink("http://www.nutch.org/g1", ""),
           new Outlink("http://www.nutch.org/g2", "bla bla"),
           new Outlink("http://www.nutch.org/test.gif", "bla bla"),
         }
      };
   
=======
    answerOutlinks = new Outlink[][]{ 
        {
          new Outlink("http://www.nutch.org", "anchor"),
        },
        {
          new Outlink("http://www.nutch.org/", "home"),
          new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
        },
        {
          new Outlink("http://www.nutch.org/", "separate this"),
          new Outlink("http://www.nutch.org/docs/ok", "from this"),
        },
        {
          new Outlink("http://www.nutch.org/", "home"),
          new Outlink("http://www.nutch.org/docs/1", "1"),
          new Outlink("http://www.nutch.org/docs/2", "2"),
        },
        {
          new Outlink("http://www.nutch.org/frames/top.html", ""),
          new Outlink("http://www.nutch.org/frames/left.html", ""),
          new Outlink("http://www.nutch.org/frames/invalid.html", ""),
          new Outlink("http://www.nutch.org/frames/right.html", ""),
        },
        {
          new Outlink("http://www.nutch.org/maps/logo.gif", ""),
          new Outlink("http://www.nutch.org/index.html", ""),
          new Outlink("http://www.nutch.org/maps/#bottom", ""),
          new Outlink("http://www.nutch.org/bot.html", ""),
          new Outlink("http://www.nutch.org/docs/index.html", ""),
        },
        {
          new Outlink("http://www.nutch.org/index.html", "whitespace test"),
        },
        {
        },
        {
          new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
        },
        {
        },
        {
          new Outlink("http://www.nutch.org/;x", "anchor1"),
          new Outlink("http://www.nutch.org/g;x", "anchor2"),
          new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
        },
        {
          new Outlink("http://www.nutch.org/g;something", "anchor1"),
          new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
          new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
          new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
          new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5")
        }
    };

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    } catch (MalformedURLException e) {
        
  }</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>      testDOMs[i]= node;
    }
    try {
    answerOutlinks = new Outlink[][]{ 
        {
          new Outlink("http://www.nutch.org", "anchor"),
        },
        {
          new Outlink("http://www.nutch.org/", "home"),
          new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
        },
        {
          new Outlink("http://www.nutch.org/", "separate this"),
          new Outlink("http://www.nutch.org/docs/ok", "from this"),
        },
        {
          new Outlink("http://www.nutch.org/", "home"),
          new Outlink("http://www.nutch.org/docs/1", "1"),
          new Outlink("http://www.nutch.org/docs/2", "2"),
        },
        {
          new Outlink("http://www.nutch.org/frames/top.html", ""),
          new Outlink("http://www.nutch.org/frames/left.html", ""),
          new Outlink("http://www.nutch.org/frames/invalid.html", ""),
          new Outlink("http://www.nutch.org/frames/right.html", ""),
        },
        {
          new Outlink("http://www.nutch.org/maps/logo.gif", ""),
          new Outlink("http://www.nutch.org/index.html", ""),
          new Outlink("http://www.nutch.org/maps/#bottom", ""),
          new Outlink("http://www.nutch.org/bot.html", ""),
          new Outlink("http://www.nutch.org/docs/index.html", ""),
        },
        {
          new Outlink("http://www.nutch.org/index.html", "whitespace test"),
        },
        {
        },
        {
          new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
        },
        {
        },
        {
          new Outlink("http://www.nutch.org/;x", "anchor1"),
          new Outlink("http://www.nutch.org/g;x", "anchor2"),
          new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
        },
        {
          new Outlink("http://www.nutch.org/g;something", "anchor1"),
          new Outlink("http://www.nutch.org/g;something?y#s", "anchor2"),
          new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
          new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
          new Outlink("http://www.nutch.org/?y=1;somethingelse", "anchor5")
        }
    };

    } catch (MalformedURLException e) {
        
  }</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestDOMContentUtils.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>        reqStr.append(userAgent);
        reqStr.append("\r\n");
      }
<<<<<<< HEAD
      
      reqStr.append("Accept-Language: ");
      reqStr.append(this.http.getAcceptLanguage());
      reqStr.append("\r\n");

      if (datum.getModifiedTime() > 0) {
        reqStr.append("If-Modified-Since: " + HttpDateFormat.toString(datum.getModifiedTime()));
        reqStr.append("\r\n");
      }
      reqStr.append("\r\n");
      
=======

      if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
        reqStr.append("If-Modified-Since: " +
                      HttpDateFormat.toString(page.getModifiedTime()));
        reqStr.append("\r\n");
      }
      reqStr.append("\r\n");

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      byte[] reqBytes= reqStr.toString().getBytes();

      req.write(reqBytes);</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>        reqStr.append(userAgent);
        reqStr.append("\r\n");
      }

      if (page.isReadable(WebPage.Field.MODIFIED_TIME.getIndex())) {
        reqStr.append("If-Modified-Since: " +
                      HttpDateFormat.toString(page.getModifiedTime()));
        reqStr.append("\r\n");
      }
      reqStr.append("\r\n");

      byte[] reqBytes= reqStr.toString().getBytes();

      req.write(reqBytes);</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">HttpResponse.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">If statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>
<<<<<<< HEAD
public class Http extends HttpBase {
 */
  public static final Log LOG = LogFactory.getLog(Http.class);

  private static MultiThreadedHttpConnectionManager connectionManager =
          new MultiThreadedHttpConnectionManager();

  // Since the Configuration has not yet been set,
  // then an unconfigured client is returned.
  private static HttpClient client = new HttpClient(connectionManager);
  private static String defaultUsername;
  private static String defaultPassword;
  private static String defaultRealm;
  private static String defaultScheme;
  private static String authFile;
  private static String agentHost;
  private static boolean authRulesRead = false;
  private static Configuration conf;

  int maxThreadsTotal = 10;

  private String proxyUsername;
  private String proxyPassword;
  private String proxyRealm;

	 * @param realm

  /**

   * Returns the configured HTTP client.
   *
   * <code>scheme</code>.
   * @return HTTP client
   */
  static synchronized HttpClient getClient() {
    return client;
  }

  /**
   * Constructs this plugin.
   */
  public Http() {
    super(LOG);
  }

  /**
   * Reads the configuration from the Nutch configuration files and sets
   * the configuration.
   *
   * @param conf Configuration
   */
  public void setConf(Configuration conf) {
    super.setConf(conf);
    this.conf = conf;
    this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
    this.proxyUsername = conf.get("http.proxy.username", "");
    this.proxyPassword = conf.get("http.proxy.password", "");
    this.proxyRealm = conf.get("http.proxy.realm", "");
    agentHost = conf.get("http.agent.host", "");
    authFile = conf.get("http.auth.file", "");
    configureClient();
    try {
      setCredentials();
    } catch (Exception ex) {
      if (LOG.isFatalEnabled()) {
        LOG.fatal("Could not read " + authFile + " : " + ex.getMessage());
        ex.printStackTrace(LogUtil.getErrorStream(LOG));
      }
    }
  }

  /**
   * Main method.
   *
   * @param args Command line arguments
   */
  public static void main(String[] args) throws Exception {
    Http http = new Http();
    http.setConf(NutchConfiguration.create());
    main(http, args);
  }

  /**
   * Fetches the <code>url</code> with a configured HTTP client and
   * gets the response.
   *
   * @param url       URL to be fetched
   * @param datum     Crawl data
   * @param redirect  Follow redirects if and only if true
   * @return          HTTP response
   */
  protected Response getResponse(URL url, CrawlDatum datum, boolean redirect)
    throws ProtocolException, IOException {
    resolveCredentials(url);
    return new HttpResponse(this, url, datum, redirect);
  }

  /**
   * Configures the HTTP client
   */
  private void configureClient() {

    // Set up an HTTPS socket factory that accepts self-signed certs.
    Protocol https = new Protocol("https",
        new DummySSLProtocolSocketFactory(), 443);
    Protocol.registerProtocol("https", https);

    HttpConnectionManagerParams params = connectionManager.getParams();
    params.setConnectionTimeout(timeout);
    params.setSoTimeout(timeout);
    params.setSendBufferSize(BUFFER_SIZE);
    params.setReceiveBufferSize(BUFFER_SIZE);
    params.setMaxTotalConnections(maxThreadsTotal);
    if (maxThreadsTotal > maxThreadsPerHost) {
      params.setDefaultMaxConnectionsPerHost(maxThreadsPerHost);
    } else {
      params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
    }

    // executeMethod(HttpMethod) seems to ignore the connection timeout on the connection manager.
    // set it explicitly on the HttpClient.
    client.getParams().setConnectionManagerTimeout(timeout);

    HostConfiguration hostConf = client.getHostConfiguration();
    ArrayList headers = new ArrayList();
    // Set the User Agent in the header
    headers.add(new Header("User-Agent", userAgent));
    // prefer English
    headers.add(new Header("Accept-Language", acceptLanguage));
    // prefer UTF-8
    headers.add(new Header("Accept-Charset", "utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
    // prefer understandable formats
    headers.add(new Header("Accept",
            "text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
    // accept gzipped content
              + url.getHost() + "; port: " + port
    headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
    hostConf.getParams().setParameter("http.default-headers", headers);

    // HTTP proxy server details
    if (useProxy) {
      hostConf.setProxy(proxyHost, proxyPort);

      if (proxyUsername.length() > 0) {

        AuthScope proxyAuthScope = getAuthScope(
            this.proxyHost, this.proxyPort, this.proxyRealm);

        NTCredentials proxyCredentials = new NTCredentials(
            this.proxyUsername, this.proxyPassword,
            this.agentHost, this.proxyRealm);

        client.getState().setProxyCredentials(
            proxyAuthScope, proxyCredentials);
      }
    }

  }

  /**
   * Reads authentication configuration file (defined as
   * 'http.auth.file' in Nutch configuration file) and sets the
   * credentials for the configured authentication scopes in the HTTP
   * client object.
   *
   * @throws ParserConfigurationException  If a document builder can not
   *                                       be created.
   * @throws SAXException                  If any parsing error occurs.
   * @throws IOException                   If any I/O error occurs.
   */
  private static synchronized void setCredentials() throws 
      ParserConfigurationException, SAXException, IOException {

    if (authRulesRead)
      return;

    authRulesRead = true; // Avoid re-attempting to read

    InputStream is = conf.getConfResourceAsInputStream(authFile);    
    if (is != null) {
      Document doc = DocumentBuilderFactory.newInstance()
                     .newDocumentBuilder().parse(is);

      Element rootElement = doc.getDocumentElement();
      if (!"auth-configuration".equals(rootElement.getTagName())) {
        if (LOG.isWarnEnabled())
          LOG.warn("Bad auth conf file: root element <"
              + rootElement.getTagName() + "> found in " + authFile
              + " - must be <auth-configuration>");
      }

      // For each set of credentials
      NodeList credList = rootElement.getChildNodes();
      for (int i = 0; i < credList.getLength(); i++) {
        Node credNode = credList.item(i);
        if (!(credNode instanceof Element))
          continue;    

        Element credElement = (Element) credNode;
        if (!"credentials".equals(credElement.getTagName())) {
          if (LOG.isWarnEnabled())
            LOG.warn("Bad auth conf file: Element <"
            + credElement.getTagName() + "> not recognized in "
            + authFile + " - expected <credentials>");
          continue;
        }

        String username = credElement.getAttribute("username");
        String password = credElement.getAttribute("password");

        // For each authentication scope
        NodeList scopeList = credElement.getChildNodes();
        for (int j = 0; j < scopeList.getLength(); j++) {
          Node scopeNode = scopeList.item(j);
          if (!(scopeNode instanceof Element))
            continue;
          
          Element scopeElement = (Element) scopeNode;

          if ("default".equals(scopeElement.getTagName())) {

            // Determine realm and scheme, if any
            String realm = scopeElement.getAttribute("realm");
            String scheme = scopeElement.getAttribute("scheme");

            // Set default credentials
            defaultUsername = username;
            defaultPassword = password;
            defaultRealm = realm;
            defaultScheme = scheme;

            if (LOG.isTraceEnabled()) {
   *
	 * @param port
              LOG.trace("Credentials - username: " + username 
                  + "; set as default"
                  + " for realm: " + realm + "; scheme: " + scheme);
            }

          } else if ("authscope".equals(scopeElement.getTagName())) {

            // Determine authentication scope details
            String host = scopeElement.getAttribute("host");
            int port = -1; // For setting port to AuthScope.ANY_PORT
            try {
              port = Integer.parseInt(
                  scopeElement.getAttribute("port"));
            } catch (Exception ex) {
              // do nothing, port is already set to any port
            }
            String realm = scopeElement.getAttribute("realm");
            String scheme = scopeElement.getAttribute("scheme");

            // Set credentials for the determined scope
            AuthScope authScope = getAuthScope(host, port, realm, scheme);
            NTCredentials credentials = new NTCredentials(
                username, password, agentHost, realm);

            client.getState().setCredentials(authScope, credentials);

            if (LOG.isTraceEnabled()) {
              LOG.trace("Credentials - username: " + username
                  + "; set for AuthScope - " + "host: " + host
                  + "; port: " + port + "; realm: " + realm
                  + "; scheme: " + scheme);
            }

          } else {
            if (LOG.isWarnEnabled())
              LOG.warn("Bad auth conf file: Element <"
                  + scopeElement.getTagName() + "> not recognized in "
                  + authFile + " - expected <authscope>");
          }
        }
        is.close();
      }
    }
  }

  /**
   * If credentials for the authentication scope determined from the
   * specified <code>url</code> is not already set in the HTTP client,
   * then this method sets the default credentials to fetch the
   * specified <code>url</code>. If credentials are found for the
   * authentication scope, the method returns without altering the
   * client.
   *
   * @param url URL to be fetched
   */
  private void resolveCredentials(URL url) {

    if (defaultUsername != null && defaultUsername.length() > 0) {

      int port = url.getPort();
      if (port == -1) {
        if ("https".equals(url.getProtocol()))
          port = 443;
        else
          port = 80;
      }

      AuthScope scope = new AuthScope(url.getHost(), port);

      if (client.getState().getCredentials(scope) != null) {
        if (LOG.isTraceEnabled())
          LOG.trace("Pre-configured credentials with scope - host: "
              + "; found for url: " + url);

        // Credentials are already configured, so do nothing and return
        return;
      }

      if (LOG.isTraceEnabled())
          LOG.trace("Pre-configured credentials with scope -  host: "
              + url.getHost() + "; port: " + port
              + "; not found for url: " + url);

      AuthScope serverAuthScope = getAuthScope(
          url.getHost(), port, defaultRealm, defaultScheme);

      NTCredentials serverCredentials = new NTCredentials(
          defaultUsername, defaultPassword,
          agentHost, defaultRealm);

      client.getState().setCredentials(
          serverAuthScope, serverCredentials);
    }
  }

  /**
   * Returns an authentication scope for the specified
   * <code>host</code>, <code>port</code>, <code>realm</code> and
   * @param host    Host name or address.
   * @param port    Port number.
   * @param realm   Authentication realm.
   * @param scheme  Authentication scheme.
   */
  private static AuthScope getAuthScope(String host, int port,
      String realm, String scheme) {
    
    if (host.length() == 0)
      host = null;

    if (port < 0)
      port = -1;

    if (realm.length() == 0)
      realm = null;

    if (scheme.length() == 0)
      scheme = null;

    return new AuthScope(host, port, realm, scheme);
  }

  /**
   * Returns an authentication scope for the specified
   * <code>host</code>, <code>port</code> and <code>realm</code>.
   *
   * @param host    Host name or address.
   * @param port    Port number.
   * @param realm   Authentication realm.
   */
  private static AuthScope getAuthScope(String host, int port,
      String realm) {

      return getAuthScope(host, port, realm, "");
  }
}
=======
	public static final Log LOG = LogFactory.getLog(Http.class);

	private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();

	// Since the Configuration has not yet been set,
	// then an unconfigured client is returned.
	private static HttpClient client = new HttpClient(connectionManager);
	private static String defaultUsername;
	private static String defaultPassword;
	private static String defaultRealm;
	private static String defaultScheme;
	private static String authFile;
	private static String agentHost;
	private static boolean authRulesRead = false;
	private static Configuration conf;

	int maxThreadsTotal = 10;

	private String proxyUsername;
	private String proxyPassword;
	private String proxyRealm;

	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();

	static {
		FIELDS.add(WebPage.Field.MODIFIED_TIME);
		FIELDS.add(WebPage.Field.HEADERS);
	}

	@Override
	public Collection<Field> getFields() {
		return FIELDS;
	}
	
	/**
	 * Returns the configured HTTP client.
	 * 
	 * @return HTTP client
	 */
	static synchronized HttpClient getClient() {
		return client;
	}

	/**
	 * Constructs this plugin.
	 */
	public Http() {
		super(LOG);
	}

	/**
	 * Reads the configuration from the Nutch configuration files and sets the
	 * configuration.
	 * 
	 * @param conf
	 *            Configuration
	 */
	public void setConf(Configuration conf) {
		super.setConf(conf);
		this.conf = conf;
		this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
		this.proxyUsername = conf.get("http.proxy.username", "");
		this.proxyPassword = conf.get("http.proxy.password", "");
		this.proxyRealm = conf.get("http.proxy.realm", "");
		agentHost = conf.get("http.agent.host", "");
		authFile = conf.get("http.auth.file", "");
		configureClient();
		try {
			setCredentials();
		} catch (Exception ex) {
			if (LOG.isFatalEnabled()) {
	 * @param realm
								.setCredentials(authScope, credentials);
				LOG.fatal("Could not read " + authFile + " : "
						+ ex.getMessage());
				ex.printStackTrace(LogUtil.getErrorStream(LOG));
			}
		}
	}

	/**
	 * Main method.
	 * 
	 * @param args
	 *            Command line arguments
	 */
	public static void main(String[] args) throws Exception {
		Http http = new Http();
		http.setConf(NutchConfiguration.create());
		main(http, args);
	}

	/**
	 * Fetches the <code>url</code> with a configured HTTP client and gets the
	 * response.
	 * 
	 * @param url
	 *            URL to be fetched
	 * @param datum
	 *            Crawl data
	 * @param redirect
	 *            Follow redirects if and only if true
	 * @return HTTP response
	 */
	protected Response getResponse(URL url, WebPage page, boolean redirect)
			throws ProtocolException, IOException {
		resolveCredentials(url);
		return new HttpResponse(this, url, page, redirect);
	}

	/**
	 * Configures the HTTP client
	 */
	private void configureClient() {

		// Set up an HTTPS socket factory that accepts self-signed certs.
		Protocol https = new Protocol("https",
				new DummySSLProtocolSocketFactory(), 443);
		Protocol.registerProtocol("https", https);

		HttpConnectionManagerParams params = connectionManager.getParams();
		params.setConnectionTimeout(timeout);
		params.setSoTimeout(timeout);
		params.setSendBufferSize(BUFFER_SIZE);
		params.setReceiveBufferSize(BUFFER_SIZE);
		params.setMaxTotalConnections(maxThreadsTotal);
		if (maxThreadsTotal > maxThreadsPerHost) {
			params.setDefaultMaxConnectionsPerHost(maxThreadsPerHost);
		} else {
			params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
		}

		// executeMethod(HttpMethod) seems to ignore the connection timeout on
		// the connection manager.
		// set it explicitly on the HttpClient.
		client.getParams().setConnectionManagerTimeout(timeout);

		HostConfiguration hostConf = client.getHostConfiguration();
		ArrayList headers = new ArrayList();
		// Set the User Agent in the header
		headers.add(new Header("User-Agent", userAgent));
		// prefer English
		headers.add(new Header("Accept-Language",
				"en-us,en-gb,en;q=0.7,*;q=0.3"));
		// prefer UTF-8
		headers.add(new Header("Accept-Charset",
				"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
		// prefer understandable formats
		headers.add(new Header(
				"Accept",
				"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
		// accept gzipped content
		headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
		hostConf.getParams().setParameter("http.default-headers", headers);

		// HTTP proxy server details
		if (useProxy) {
			hostConf.setProxy(proxyHost, proxyPort);

			if (proxyUsername.length() > 0) {

				AuthScope proxyAuthScope = getAuthScope(this.proxyHost,
						this.proxyPort, this.proxyRealm);

				NTCredentials proxyCredentials = new NTCredentials(
						this.proxyUsername, this.proxyPassword, this.agentHost,
						this.proxyRealm);

				client.getState().setProxyCredentials(proxyAuthScope,
						proxyCredentials);
			}
		}

	}

	/**
	 * Reads authentication configuration file (defined as 'http.auth.file' in
	 * Nutch configuration file) and sets the credentials for the configured
	 * authentication scopes in the HTTP client object.
	 * 
	 * @throws ParserConfigurationException
	 *             If a document builder can not be created.
	 * @throws SAXException
	 *             If any parsing error occurs.
	 * @throws IOException
	 *             If any I/O error occurs.
	 */
	private static synchronized void setCredentials()
			throws ParserConfigurationException, SAXException, IOException {

		if (authRulesRead)
			return;

		authRulesRead = true; // Avoid re-attempting to read

		InputStream is = conf.getConfResourceAsInputStream(authFile);
		if (is != null) {
			Document doc = DocumentBuilderFactory.newInstance()
					.newDocumentBuilder().parse(is);

			Element rootElement = doc.getDocumentElement();
			if (!"auth-configuration".equals(rootElement.getTagName())) {
				if (LOG.isWarnEnabled())
					LOG.warn("Bad auth conf file: root element <"
							+ rootElement.getTagName() + "> found in "
							+ authFile + " - must be <auth-configuration>");
			}

			// For each set of credentials
			NodeList credList = rootElement.getChildNodes();
			for (int i = 0; i < credList.getLength(); i++) {
				Node credNode = credList.item(i);
				if (!(credNode instanceof Element))
					continue;

				Element credElement = (Element) credNode;
				if (!"credentials".equals(credElement.getTagName())) {
					if (LOG.isWarnEnabled())
						LOG.warn("Bad auth conf file: Element <"
								+ credElement.getTagName()
								+ "> not recognized in " + authFile
								+ " - expected <credentials>");
					continue;
				}

				String username = credElement.getAttribute("username");
				String password = credElement.getAttribute("password");

				// For each authentication scope
				NodeList scopeList = credElement.getChildNodes();
				for (int j = 0; j < scopeList.getLength(); j++) {
					Node scopeNode = scopeList.item(j);
					if (!(scopeNode instanceof Element))
						continue;

					Element scopeElement = (Element) scopeNode;

					if ("default".equals(scopeElement.getTagName())) {

						// Determine realm and scheme, if any
						String realm = scopeElement.getAttribute("realm");
						String scheme = scopeElement.getAttribute("scheme");

						// Set default credentials
						defaultUsername = username;
						defaultPassword = password;
						defaultRealm = realm;
						defaultScheme = scheme;

						if (LOG.isTraceEnabled()) {
							LOG.trace("Credentials - username: " + username
									+ "; set as default" + " for realm: "
									+ realm + "; scheme: " + scheme);
						}

					} else if ("authscope".equals(scopeElement.getTagName())) {

						// Determine authentication scope details
						String host = scopeElement.getAttribute("host");
						int port = -1; // For setting port to AuthScope.ANY_PORT
						try {
							port = Integer.parseInt(scopeElement
									.getAttribute("port"));
						} catch (Exception ex) {
							// do nothing, port is already set to any port
						}
						String realm = scopeElement.getAttribute("realm");
						String scheme = scopeElement.getAttribute("scheme");

						// Set credentials for the determined scope
						AuthScope authScope = getAuthScope(host, port, realm,
								scheme);
						NTCredentials credentials = new NTCredentials(username,
								password, agentHost, realm);

						client.getState()
	 *            Port number.
						if (LOG.isTraceEnabled()) {
							LOG.trace("Credentials - username: " + username
									+ "; set for AuthScope - " + "host: "
									+ host + "; port: " + port + "; realm: "
									+ realm + "; scheme: " + scheme);
						}

					} else {
						if (LOG.isWarnEnabled())
							LOG.warn("Bad auth conf file: Element <"
									+ scopeElement.getTagName()
									+ "> not recognized in " + authFile
									+ " - expected <authscope>");
					}
				}
				is.close();
			}
		}
	}

	/**
	 * If credentials for the authentication scope determined from the specified
	 * <code>url</code> is not already set in the HTTP client, then this method
	 * sets the default credentials to fetch the specified <code>url</code>. If
	 * credentials are found for the authentication scope, the method returns
	 * without altering the client.
	 * 
	 * @param url
	 *            URL to be fetched
	 */
	private void resolveCredentials(URL url) {

		if (defaultUsername != null && defaultUsername.length() > 0) {

			int port = url.getPort();
			if (port == -1) {
				if ("https".equals(url.getProtocol()))
					port = 443;
				else
					port = 80;
			}

			AuthScope scope = new AuthScope(url.getHost(), port);

			if (client.getState().getCredentials(scope) != null) {
				if (LOG.isTraceEnabled())
					LOG.trace("Pre-configured credentials with scope - host: "
							+ url.getHost() + "; port: " + port
							+ "; found for url: " + url);

				// Credentials are already configured, so do nothing and return
				return;
			}

			if (LOG.isTraceEnabled())
				LOG.trace("Pre-configured credentials with scope -  host: "
						+ url.getHost() + "; port: " + port
						+ "; not found for url: " + url);

			AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
					defaultRealm, defaultScheme);

			NTCredentials serverCredentials = new NTCredentials(
					defaultUsername, defaultPassword, agentHost, defaultRealm);

			client.getState()
					.setCredentials(serverAuthScope, serverCredentials);
		}
	}

	/**
	 * Returns an authentication scope for the specified <code>host</code>,
	 * <code>port</code>, <code>realm</code> and <code>scheme</code>.
	 * 
	 * @param host
	 *            Host name or address.
	 *            Authentication realm.
	 * @param scheme
	 *            Authentication scheme.
	 */
	private static AuthScope getAuthScope(String host, int port, String realm,
			String scheme) {

		if (host.length() == 0)
			host = null;

		if (port < 0)
			port = -1;

		if (realm.length() == 0)
			realm = null;

		if (scheme.length() == 0)
			scheme = null;

		return new AuthScope(host, port, realm, scheme);
	}

	/**
	 * Returns an authentication scope for the specified <code>host</code>,
	 * <code>port</code> and <code>realm</code>.
	 * 
	 * @param host
	 *            Host name or address.
	 * @param port
	 *            Port number.
	 *            Authentication realm.
	 */
	private static AuthScope getAuthScope(String host, int port, String realm) {

		return getAuthScope(host, port, realm, "");
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre> */
public class Http extends HttpBase {

	public static final Log LOG = LogFactory.getLog(Http.class);

	private static MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();

	// Since the Configuration has not yet been set,
	// then an unconfigured client is returned.
	private static HttpClient client = new HttpClient(connectionManager);
	private static String defaultUsername;
	private static String defaultPassword;
	private static String defaultRealm;
								+ " - expected <credentials>");
					continue;
	private static String defaultScheme;
	private static String authFile;
	private static String agentHost;
	private static boolean authRulesRead = false;
	private static Configuration conf;

	int maxThreadsTotal = 10;

	private String proxyUsername;
	private String proxyPassword;
	private String proxyRealm;

	private static final Collection<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();

	static {
		FIELDS.add(WebPage.Field.MODIFIED_TIME);
		FIELDS.add(WebPage.Field.HEADERS);
	}

	@Override
	public Collection<Field> getFields() {
		return FIELDS;
	}
	
	/**
	 * Returns the configured HTTP client.
	 * 
	 * @return HTTP client
	 */
	static synchronized HttpClient getClient() {
		return client;
	}

	/**
	 * Constructs this plugin.
	 */
	public Http() {
		super(LOG);
	}

	/**
	 * Reads the configuration from the Nutch configuration files and sets the
	 * configuration.
	 * 
	 * @param conf
	 *            Configuration
	 */
	public void setConf(Configuration conf) {
		super.setConf(conf);
		this.conf = conf;
		this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
		this.proxyUsername = conf.get("http.proxy.username", "");
		this.proxyPassword = conf.get("http.proxy.password", "");
		this.proxyRealm = conf.get("http.proxy.realm", "");
		agentHost = conf.get("http.agent.host", "");
		authFile = conf.get("http.auth.file", "");
		configureClient();
		try {
			setCredentials();
		} catch (Exception ex) {
			if (LOG.isFatalEnabled()) {
				LOG.fatal("Could not read " + authFile + " : "
						+ ex.getMessage());
				ex.printStackTrace(LogUtil.getErrorStream(LOG));
			}
		}
	}

	/**
	 * Main method.
	 * 
	 * @param args
	 *            Command line arguments
	 */
	public static void main(String[] args) throws Exception {
		Http http = new Http();
		http.setConf(NutchConfiguration.create());
		main(http, args);
	}

	/**
	 * Fetches the <code>url</code> with a configured HTTP client and gets the
	 * response.
	 * 
	 * @param url
	 *            URL to be fetched
	 * @param datum
	 *            Crawl data
	 * @param redirect
	 *            Follow redirects if and only if true
	 * @return HTTP response
	 */
	protected Response getResponse(URL url, WebPage page, boolean redirect)
			throws ProtocolException, IOException {
		resolveCredentials(url);
		return new HttpResponse(this, url, page, redirect);
	}

	/**
	 * Configures the HTTP client
	 */
	private void configureClient() {

		// Set up an HTTPS socket factory that accepts self-signed certs.
		Protocol https = new Protocol("https",
				new DummySSLProtocolSocketFactory(), 443);
		Protocol.registerProtocol("https", https);

		HttpConnectionManagerParams params = connectionManager.getParams();
		params.setConnectionTimeout(timeout);
		params.setSoTimeout(timeout);
				}
		params.setSendBufferSize(BUFFER_SIZE);
		params.setReceiveBufferSize(BUFFER_SIZE);
		params.setMaxTotalConnections(maxThreadsTotal);
		if (maxThreadsTotal > maxThreadsPerHost) {
			params.setDefaultMaxConnectionsPerHost(maxThreadsPerHost);
		} else {
			params.setDefaultMaxConnectionsPerHost(maxThreadsTotal);
		}

		// executeMethod(HttpMethod) seems to ignore the connection timeout on
		// the connection manager.
		// set it explicitly on the HttpClient.
		client.getParams().setConnectionManagerTimeout(timeout);

		HostConfiguration hostConf = client.getHostConfiguration();
		ArrayList headers = new ArrayList();
		// Set the User Agent in the header
		headers.add(new Header("User-Agent", userAgent));
		// prefer English
		headers.add(new Header("Accept-Language",
				"en-us,en-gb,en;q=0.7,*;q=0.3"));
		// prefer UTF-8
		headers.add(new Header("Accept-Charset",
				"utf-8,ISO-8859-1;q=0.7,*;q=0.7"));
		// prefer understandable formats
		headers.add(new Header(
				"Accept",
				"text/html,application/xml;q=0.9,application/xhtml+xml,text/xml;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"));
		// accept gzipped content
		headers.add(new Header("Accept-Encoding", "x-gzip, gzip, deflate"));
		hostConf.getParams().setParameter("http.default-headers", headers);

		// HTTP proxy server details
		if (useProxy) {
			hostConf.setProxy(proxyHost, proxyPort);

			if (proxyUsername.length() > 0) {

				AuthScope proxyAuthScope = getAuthScope(this.proxyHost,
						this.proxyPort, this.proxyRealm);

				NTCredentials proxyCredentials = new NTCredentials(
						this.proxyUsername, this.proxyPassword, this.agentHost,
						this.proxyRealm);

				client.getState().setProxyCredentials(proxyAuthScope,
						proxyCredentials);
			}
		}

	}

	/**
	 * Reads authentication configuration file (defined as 'http.auth.file' in
	 * Nutch configuration file) and sets the credentials for the configured
	 * authentication scopes in the HTTP client object.
	 * 
	 * @throws ParserConfigurationException
	 *             If a document builder can not be created.
	 * @throws SAXException
	 *             If any parsing error occurs.
	 * @throws IOException
	 *             If any I/O error occurs.
	 */
	private static synchronized void setCredentials()
			throws ParserConfigurationException, SAXException, IOException {

		if (authRulesRead)
			return;

		authRulesRead = true; // Avoid re-attempting to read

		InputStream is = conf.getConfResourceAsInputStream(authFile);
		if (is != null) {
			Document doc = DocumentBuilderFactory.newInstance()
					.newDocumentBuilder().parse(is);

			Element rootElement = doc.getDocumentElement();
			if (!"auth-configuration".equals(rootElement.getTagName())) {
				if (LOG.isWarnEnabled())
					LOG.warn("Bad auth conf file: root element <"
							+ rootElement.getTagName() + "> found in "
							+ authFile + " - must be <auth-configuration>");
			}

			// For each set of credentials
			NodeList credList = rootElement.getChildNodes();
			for (int i = 0; i < credList.getLength(); i++) {
				Node credNode = credList.item(i);
				if (!(credNode instanceof Element))
					continue;

				Element credElement = (Element) credNode;
				if (!"credentials".equals(credElement.getTagName())) {
					if (LOG.isWarnEnabled())
						LOG.warn("Bad auth conf file: Element <"
								+ credElement.getTagName()
								+ "> not recognized in " + authFile

				String username = credElement.getAttribute("username");
				String password = credElement.getAttribute("password");

				// For each authentication scope
				NodeList scopeList = credElement.getChildNodes();
				for (int j = 0; j < scopeList.getLength(); j++) {
					Node scopeNode = scopeList.item(j);
					if (!(scopeNode instanceof Element))
						continue;

					Element scopeElement = (Element) scopeNode;

					if ("default".equals(scopeElement.getTagName())) {

						// Determine realm and scheme, if any
						String realm = scopeElement.getAttribute("realm");
						String scheme = scopeElement.getAttribute("scheme");

						// Set default credentials
						defaultUsername = username;
						defaultPassword = password;
						defaultRealm = realm;
						defaultScheme = scheme;

						if (LOG.isTraceEnabled()) {
							LOG.trace("Credentials - username: " + username
									+ "; set as default" + " for realm: "
									+ realm + "; scheme: " + scheme);
						}

					} else if ("authscope".equals(scopeElement.getTagName())) {

						// Determine authentication scope details
						String host = scopeElement.getAttribute("host");
						int port = -1; // For setting port to AuthScope.ANY_PORT
						try {
							port = Integer.parseInt(scopeElement
									.getAttribute("port"));
						} catch (Exception ex) {
							// do nothing, port is already set to any port
						}
						String realm = scopeElement.getAttribute("realm");
						String scheme = scopeElement.getAttribute("scheme");

						// Set credentials for the determined scope
						AuthScope authScope = getAuthScope(host, port, realm,
								scheme);
						NTCredentials credentials = new NTCredentials(username,
								password, agentHost, realm);

						client.getState()
								.setCredentials(authScope, credentials);

						if (LOG.isTraceEnabled()) {
							LOG.trace("Credentials - username: " + username
									+ "; set for AuthScope - " + "host: "
									+ host + "; port: " + port + "; realm: "
									+ realm + "; scheme: " + scheme);
						}

					} else {
						if (LOG.isWarnEnabled())
							LOG.warn("Bad auth conf file: Element <"
									+ scopeElement.getTagName()
									+ "> not recognized in " + authFile
									+ " - expected <authscope>");
					}
				}
				is.close();
			}
		}
	}

	/**
	 * If credentials for the authentication scope determined from the specified
	 * <code>url</code> is not already set in the HTTP client, then this method
	 * sets the default credentials to fetch the specified <code>url</code>. If
	 * credentials are found for the authentication scope, the method returns
	 * without altering the client.
	 * 
	 * @param url
	 *            URL to be fetched
	 */
	private void resolveCredentials(URL url) {

		if (defaultUsername != null && defaultUsername.length() > 0) {

			int port = url.getPort();
			if (port == -1) {
				if ("https".equals(url.getProtocol()))
					port = 443;
				else
					port = 80;
			}

			AuthScope scope = new AuthScope(url.getHost(), port);

			if (client.getState().getCredentials(scope) != null) {
				if (LOG.isTraceEnabled())
					LOG.trace("Pre-configured credentials with scope - host: "
							+ url.getHost() + "; port: " + port
							+ "; found for url: " + url);

				// Credentials are already configured, so do nothing and return
				return;
			}

			if (LOG.isTraceEnabled())
				LOG.trace("Pre-configured credentials with scope -  host: "
						+ url.getHost() + "; port: " + port
						+ "; not found for url: " + url);

			AuthScope serverAuthScope = getAuthScope(url.getHost(), port,
					defaultRealm, defaultScheme);

			NTCredentials serverCredentials = new NTCredentials(
					defaultUsername, defaultPassword, agentHost, defaultRealm);

			client.getState()
					.setCredentials(serverAuthScope, serverCredentials);
		}
	}

	/**
	 * Returns an authentication scope for the specified <code>host</code>,
	 * <code>port</code>, <code>realm</code> and <code>scheme</code>.
	 * 
	 * @param host
	 *            Host name or address.
	 * @param port
	 *            Port number.
	 * @param realm
	 *            Authentication realm.
	 * @param scheme
	 *            Authentication scheme.
	 */
	private static AuthScope getAuthScope(String host, int port, String realm,
			String scheme) {

		if (host.length() == 0)
			host = null;

		if (port < 0)
			port = -1;

		if (realm.length() == 0)
			realm = null;

		if (scheme.length() == 0)
			scheme = null;

		return new AuthScope(host, port, realm, scheme);
	}

	/**
	 * Returns an authentication scope for the specified <code>host</code>,
	 * <code>port</code> and <code>realm</code>.
	 * 
	 * @param host
	 *            Host name or address.
	 * @param port
	 *            Port number.
	 * @param realm
	 *            Authentication realm.
	 */
	private static AuthScope getAuthScope(String host, int port, String realm) {

		return getAuthScope(host, port, realm, "");
	}

}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">Http.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Annotation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Static initializer</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>import java.net.URL;

import junit.framework.TestCase;
<<<<<<< HEAD
import org.mortbay.jetty.Server;
import org.mortbay.jetty.bio.SocketConnector;
import org.mortbay.jetty.handler.ContextHandler;
import org.mortbay.jetty.handler.ResourceHandler;
import org.mortbay.jetty.servlet.ServletHandler;
import org.mortbay.jetty.servlet.SessionHandler;
=======

>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.storage.WebPage;</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>import java.net.URL;

import junit.framework.TestCase;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.storage.WebPage;</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestProtocolHttpClient.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    startServer(47500);
	 */
 */
public class TestProtocolHttpClient extends TestCase {

<<<<<<< HEAD
  private Server server;
  private Configuration conf;
  private static final String RES_DIR = System.getProperty("test.data", ".");
  private int port;
  private Http http = new Http();

  protected void setUp() throws Exception {

    ContextHandler context = new ContextHandler();
    context.setContextPath("/");
    context.setResourceBase(RES_DIR);
    ServletHandler sh = new ServletHandler();
    sh.addServlet("org.apache.jasper.servlet.JspServlet", "*.jsp");
    context.addHandler(sh);
    context.addHandler(new SessionHandler());

    server = new Server();
    server.addHandler(context);

    conf = new Configuration();
    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site-test.xml");
    
    http = new Http();
    http.setConf(conf);
  }

  protected void tearDown() throws Exception {
    server.stop();
  }

  /**
   * Tests whether the client can remember cookies.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testCookies() throws Exception {
    startServer(47500);
    fetchPage("/cookies.jsp", 200);
    fetchPage("/cookies.jsp?cookie=yes", 200);
  }

  /**
   * Tests that no pre-emptive authorization headers are sent by the
   * client.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testNoPreemptiveAuth() throws Exception {
    fetchPage("/noauth.jsp", 200);
  }

  /**
   * Tests default credentials.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testDefaultCredentials() throws Exception {
    startServer(47502);
    fetchPage("/basic.jsp", 200);
  }

  /**
   * Tests basic authentication scheme for various realms.
   * 
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testBasicAuth() throws Exception {
    startServer(47500);
    fetchPage("/basic.jsp", 200);
    fetchPage("/basic.jsp?case=1", 200);
    fetchPage("/basic.jsp?case=2", 200);
    server.start();
  }

  /**
   * Tests that authentication happens for a defined realm and not for
	public void testDigestAuth() throws Exception {
		startServer(47500);
   * other realms for a host:port when an extra <code>authscope</code>
   * tag is not defined to match all other realms.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testOtherRealmsNoAuth() throws Exception {
    startServer(47501);
    fetchPage("/basic.jsp", 200);
    fetchPage("/basic.jsp?case=1", 401);
    fetchPage("/basic.jsp?case=2", 401);
  }

  /**
   * Tests Digest authentication scheme.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testDigestAuth() throws Exception {
    startServer(47500);
    fetchPage("/digest.jsp", 200);
  }

  /**
   * Tests NTLM authentication scheme.
   *
   * @throws Exception If an error occurs or the test case fails.
   */
  public void testNtlmAuth() throws Exception {
    startServer(47501);
    fetchPage("/ntlm.jsp", 200);
  }

  /**
   * Starts the Jetty server at a specified port.
   *
   * @param  portno     Port number.
   * @throws Exception  When an error occurs.
   */
  private void startServer(int portno) throws Exception {
    port = portno;
    SocketConnector listener = new SocketConnector();
    listener.setHost("127.0.0.1");
    listener.setPort(port);
    server.addConnector(listener);
    server.start();
  }

  /**
   * Fetches the specified <code>page</code> from the local Jetty server
   * and checks whether the HTTP response status code matches with the
   * expected code.
   *
   * @param page          Page to be fetched.
   * @param expectedCode  HTTP response status code expected while
   *                      fetching the page.
   * @throws Exception    When an error occurs or test case fails.
   */
  private void fetchPage(String page, int expectedCode)
      throws Exception {
    URL url = new URL("http", "127.0.0.1", port, page);
    Response response = null;
    response = http.getResponse(url, new CrawlDatum(), true);

    int code = response.getCode();
    assertEquals("HTTP Status Code for " + url, expectedCode, code);
  }
  
  /**
   * Returns an URL to the specified page.
   *
   * @param  page                  Page available in the local Jetty
   *                               server.
   * @throws MalformedURLException If an URL can not be formed.
   */
  private URL getURL(String page) throws MalformedURLException {
    return new URL("http", "127.0.0.1", port, page);
  }
=======
	private Server server;
	private Configuration conf;
	private static final String RES_DIR = System.getProperty("test.data", ".");
	private int port;
	private Http http = new Http();

	protected void setUp() throws Exception {

		server = new Server();
		
//		Context scontext = new Context();
//		scontext.setContextPath("/");
//		scontext.setResourceBase(RES_DIR);
//		// servlet handler?
//		scontext.addServlet("JSP", "*.jsp",
//				"org.apache.jasper.servlet.JspServlet");
//		scontext.addHandler(new ResourceHandler());

		Context root = new Context(server,"/",Context.SESSIONS);
		root.setContextPath("/");
		root.setResourceBase(RES_DIR);
		ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
		root.addServlet(sh, "*.jsp");

		conf = new Configuration();
		conf.addResource("nutch-default.xml");
		conf.addResource("nutch-site-test.xml");

		http = new Http();
		http.setConf(conf);
	}

	protected void tearDown() throws Exception {
		server.stop();
	}

	/**
	 * Tests whether the client can remember cookies.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testCookies() throws Exception {
		startServer(47500);
		fetchPage("/cookies.jsp", 200);
		fetchPage("/cookies.jsp?cookie=yes", 200);
	}

	/**
	 * Tests that no pre-emptive authorization headers are sent by the client.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testNoPreemptiveAuth() throws Exception {
		startServer(47500);
		fetchPage("/noauth.jsp", 200);
	}

	/**
	 * Tests default credentials.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testDefaultCredentials() throws Exception {
		startServer(47502);
		fetchPage("/basic.jsp", 200);
	}

	/**
	 * Tests basic authentication scheme for various realms.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testBasicAuth() throws Exception {
		startServer(47500);
		fetchPage("/basic.jsp", 200);
		fetchPage("/basic.jsp?case=1", 200);
		fetchPage("/basic.jsp?case=2", 200);
		server.start();
	}

	/**
	 * Tests that authentication happens for a defined realm and not for other
	 * realms for a host:port when an extra <code>authscope</code> tag is not
	 * defined to match all other realms.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testOtherRealmsNoAuth() throws Exception {
		startServer(47501);
		fetchPage("/basic.jsp", 200);
		fetchPage("/basic.jsp?case=1", 401);
		fetchPage("/basic.jsp?case=2", 401);
	}

	/**
	 * Tests Digest authentication scheme.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
		fetchPage("/digest.jsp", 200);
	}

	/**
	 * Tests NTLM authentication scheme.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testNtlmAuth() throws Exception {
		startServer(47501);
		fetchPage("/ntlm.jsp", 200);
	}

	/**
	 * Starts the Jetty server at a specified port.
	 * 
	 * @param portno
	 *            Port number.
	 * @throws Exception
	 *             When an error occurs.
	 */
	private void startServer(int portno) throws Exception {
		port = portno;

		SelectChannelConnector connector1 = new SelectChannelConnector();
		connector1.setHost("127.0.0.1");
		connector1.setPort(port);

		server.addConnector(connector1);
		server.start();
	}

	/**
	 * Fetches the specified <code>page</code> from the local Jetty server and
	 * checks whether the HTTP response status code matches with the expected
	 * code.
	 * 
	 * @param page
	 *            Page to be fetched.
	 * @param expectedCode
	 *            HTTP response status code expected while fetching the page.
	 * @throws Exception
	 *             When an error occurs or test case fails.
	 */
	private void fetchPage(String page, int expectedCode) throws Exception {
		URL url = new URL("http", "127.0.0.1", port, page);
		Response response = null;
		response = http.getResponse(url, new WebPage(), true);

		int code = response.getCode();
		assertEquals("HTTP Status Code for " + url, expectedCode, code);
	}

	/**
	 * Returns an URL to the specified page.
	 * 
	 * @param page
	 *            Page available in the local Jetty server.
	 * @throws MalformedURLException
	 *             If an URL can not be formed.
	 */
	private URL getURL(String page) throws MalformedURLException {
		return new URL("http", "127.0.0.1", port, page);
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre> */
public class TestProtocolHttpClient extends TestCase {

	private Server server;
	private Configuration conf;
	private static final String RES_DIR = System.getProperty("test.data", ".");
	private int port;
	private Http http = new Http();

	protected void setUp() throws Exception {

		server = new Server();
		
//		Context scontext = new Context();
//		scontext.setContextPath("/");
//		scontext.setResourceBase(RES_DIR);
//		// servlet handler?
//		scontext.addServlet("JSP", "*.jsp",
//				"org.apache.jasper.servlet.JspServlet");
//		scontext.addHandler(new ResourceHandler());

		Context root = new Context(server,"/",Context.SESSIONS);
		root.setContextPath("/");
		root.setResourceBase(RES_DIR);
		ServletHolder sh = new ServletHolder(org.apache.jasper.servlet.JspServlet.class);
		root.addServlet(sh, "*.jsp");

		conf = new Configuration();
		conf.addResource("nutch-default.xml");
		conf.addResource("nutch-site-test.xml");

		http = new Http();
		http.setConf(conf);
	}

	}
	protected void tearDown() throws Exception {
		server.stop();

	/**
	 * Tests whether the client can remember cookies.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testCookies() throws Exception {
		startServer(47500);
		fetchPage("/cookies.jsp", 200);
		fetchPage("/cookies.jsp?cookie=yes", 200);
	}

	/**
	 * Tests that no pre-emptive authorization headers are sent by the client.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testNoPreemptiveAuth() throws Exception {
		startServer(47500);
		fetchPage("/noauth.jsp", 200);
	}

	/**
	 * Tests default credentials.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testDefaultCredentials() throws Exception {
		startServer(47502);
		fetchPage("/basic.jsp", 200);
	}

	/**
	 * Tests basic authentication scheme for various realms.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testBasicAuth() throws Exception {
		startServer(47500);
		fetchPage("/basic.jsp", 200);
		fetchPage("/basic.jsp?case=1", 200);
		fetchPage("/basic.jsp?case=2", 200);
		server.start();
	}

	/**
	 * Tests that authentication happens for a defined realm and not for other
	 * realms for a host:port when an extra <code>authscope</code> tag is not
	 * defined to match all other realms.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testOtherRealmsNoAuth() throws Exception {
		startServer(47501);
		fetchPage("/basic.jsp", 200);
		fetchPage("/basic.jsp?case=1", 401);
		fetchPage("/basic.jsp?case=2", 401);
	}

	/**
	 * Tests Digest authentication scheme.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testDigestAuth() throws Exception {
		startServer(47500);
		fetchPage("/digest.jsp", 200);
	}

	/**
	 * Tests NTLM authentication scheme.
	 * 
	 * @throws Exception
	 *             If an error occurs or the test case fails.
	 */
	public void testNtlmAuth() throws Exception {
		startServer(47501);
		fetchPage("/ntlm.jsp", 200);
	}

	/**
	 * Starts the Jetty server at a specified port.
	 * 
	 * @param portno
	 *            Port number.
	 * @throws Exception
	 *             When an error occurs.
	 */
	private void startServer(int portno) throws Exception {
		port = portno;

		SelectChannelConnector connector1 = new SelectChannelConnector();
		connector1.setHost("127.0.0.1");
		connector1.setPort(port);

		server.addConnector(connector1);
		server.start();
	}

	/**
	 * Fetches the specified <code>page</code> from the local Jetty server and
	 * checks whether the HTTP response status code matches with the expected
	 * code.
	 * 
	 * @param page
	 *            Page to be fetched.
	 * @param expectedCode
	 *            HTTP response status code expected while fetching the page.
	 * @throws Exception
	 *             When an error occurs or test case fails.
	 */
	private void fetchPage(String page, int expectedCode) throws Exception {
		URL url = new URL("http", "127.0.0.1", port, page);
		Response response = null;
		response = http.getResponse(url, new WebPage(), true);

		int code = response.getCode();
		assertEquals("HTTP Status Code for " + url, expectedCode, code);
	}

	/**
	 * Returns an URL to the specified page.
	 * 
	 * @param page
	 *            Page available in the local Jetty server.
	 * @throws MalformedURLException
	 *             If an URL can not be formed.
	 */
	private URL getURL(String page) throws MalformedURLException {
		return new URL("http", "127.0.0.1", port, page);
	}
}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestProtocolHttpClient.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD

import org.apache.nutch.collection.CollectionManager;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
=======
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.storage.WebPage.Field;
import org.apache.nutch.util.NutchConfiguration;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

public class SubcollectionIndexingFilter extends Configured implements
		IndexingFilter {</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.storage.WebPage.Field;
import org.apache.nutch.util.NutchConfiguration;

public class SubcollectionIndexingFilter extends Configured implements
		IndexingFilter {</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">SubcollectionIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>		doc.add(FIELD_NAME, collname);
	}

<<<<<<< HEAD
  public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
    String sUrl = url.toString();
    addSubCollectionField(doc, sUrl);
    return doc;
  }
=======
	@Override
	public Collection<Field> getFields() {
		return new ArrayList<Field>();
	}

	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {
		addSubCollectionField(doc, url);
		return doc;
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>		doc.add(FIELD_NAME, collname);
	}

	@Override
	public Collection<Field> getFields() {
		return new ArrayList<Field>();
	}

	@Override
	public NutchDocument filter(NutchDocument doc, String url, WebPage page)
			throws IndexingException {
		addSubCollectionField(doc, url);
		return doc;
	}
}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">SubcollectionIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Annotation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD
import org.apache.nutch.parse.Parse;
=======
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.storage.WebPage.Field;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.util.URLUtil;
import org.apache.nutch.util.domain.DomainSuffix;
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.storage.WebPage.Field;
import org.apache.nutch.util.URLUtil;
import org.apache.nutch.util.domain.DomainSuffix;
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TLDIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>  public Configuration getConf() {
    return this.conf;
  }
<<<<<<< HEAD
=======

  @Override
  public Collection<Field> getFields() {
    return fields;
  }
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>  public Configuration getConf() {
    return this.conf;
  }

  @Override
  public Collection<Field> getFields() {
    return fields;
  }
}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TLDIndexingFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Annotation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.indexer.NutchDocument;
<<<<<<< HEAD
import org.apache.nutch.indexer.NutchField;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.protocol.Content;
=======
import org.apache.nutch.scoring.ScoreDatum;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.storage.WebPage;</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.scoring.ScoreDatum;
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.storage.WebPage;</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TLDScoringFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>
	@Override
 */
public class TLDScoringFilter implements ScoringFilter {

<<<<<<< HEAD
  private Configuration conf;
  private DomainSuffixes tldEntries;

  public TLDScoringFilter() {
    tldEntries = DomainSuffixes.getInstance();
  }

  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
      throws ScoringFilterException {

    NutchField tlds = doc.getField("tld");
    float boost = 1.0f;

    if(tlds != null) {
      for(Object tld : tlds.getValues()) {
        DomainSuffix entry = tldEntries.get(tld.toString());
        if(entry != null)
          boost *= entry.getBoost();
      }
    }
    return initScore * boost;
  }

  public CrawlDatum distributeScoreToOutlink(Text fromUrl, Text toUrl,
      ParseData parseData, CrawlDatum target, CrawlDatum adjust, int allCount,
      int validCount) throws ScoringFilterException {
    return adjust;
  }

  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
      throws ScoringFilterException {
    return initSort;
  }

  public void initialScore(Text url, CrawlDatum datum)
      throws ScoringFilterException {
  }

  public void injectedScore(Text url, CrawlDatum datum)
      throws ScoringFilterException {
  }

  public void passScoreAfterParsing(Text url, Content content, Parse parse)
      throws ScoringFilterException {
  }

  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
      throws ScoringFilterException {
  }

  public void updateDbScore(Text url, CrawlDatum old,
                            CrawlDatum datum, List<CrawlDatum> inlinked)
  throws ScoringFilterException {
  }

  public Configuration getConf() {
    return conf;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }
  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, 
          Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust,
          int allCount) throws ScoringFilterException {
    return adjust;
  }
=======
	private Configuration conf;
	private DomainSuffixes tldEntries;

	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();

	public TLDScoringFilter() {
		tldEntries = DomainSuffixes.getInstance();
	}

	public Configuration getConf() {
		return conf;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	@Override
	public Collection<WebPage.Field> getFields() {
		return FIELDS;
	}

	@Override
	public void injectedScore(String url, WebPage page)
			throws ScoringFilterException {
	}
	public void initialScore(String url, WebPage page)
			throws ScoringFilterException {

	}

	@Override
	public float generatorSortValue(String url, WebPage page, float initSort)
			throws ScoringFilterException {
		return initSort;
	}

	@Override
	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
			Collection<ScoreDatum> scoreData, int allCount)
			throws ScoringFilterException {
	}

	@Override
	public void updateScore(String url, WebPage page,
			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
	}

	@Override
	public float indexerScore(String url, NutchDocument doc, WebPage page,
			float initScore) throws ScoringFilterException {
		List<String> tlds = doc.getFieldValues("tld");
		float boost = 1.0f;

		if (tlds != null) {
			for (String tld : tlds) {
				DomainSuffix entry = tldEntries.get(tld);
				if (entry != null)
					boost *= entry.getBoost();
			}
		}
		return initScore * boost;
	}
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

}</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre> */
public class TLDScoringFilter implements ScoringFilter {

	private Configuration conf;
	private DomainSuffixes tldEntries;

	private final static Set<WebPage.Field> FIELDS = new HashSet<WebPage.Field>();

	public TLDScoringFilter() {
		tldEntries = DomainSuffixes.getInstance();
	}

	public Configuration getConf() {
		return conf;
	}

	public void setConf(Configuration conf) {
		this.conf = conf;
	}

	@Override
	public Collection<WebPage.Field> getFields() {
		return FIELDS;
	}

	@Override
	public void injectedScore(String url, WebPage page)
			throws ScoringFilterException {
	}

	@Override
	public void initialScore(String url, WebPage page)
			throws ScoringFilterException {

	}

	@Override
	public float generatorSortValue(String url, WebPage page, float initSort)
			throws ScoringFilterException {
		return initSort;
	}

	@Override
	public void distributeScoreToOutlinks(String fromUrl, WebPage page,
			Collection<ScoreDatum> scoreData, int allCount)
			throws ScoringFilterException {
	}

	@Override
	public void updateScore(String url, WebPage page,
			List<ScoreDatum> inlinkedScoreData) throws ScoringFilterException {
	}

	@Override
	public float indexerScore(String url, NutchDocument doc, WebPage page,
			float initScore) throws ScoringFilterException {
		List<String> tlds = doc.getFieldValues("tld");
		float boost = 1.0f;

		if (tlds != null) {
			for (String tld : tlds) {
				DomainSuffix entry = tldEntries.get(tld);
				if (entry != null)
					boost *= entry.getBoost();
			}
		}
		return initScore * boost;
	}

}</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TLDScoringFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Annotation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Attribute</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method declaration</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre> * limitations under the License.
 */

<<<<<<< HEAD
// $Id$
=======
// $Id: PrefixURLFilter.java 823614 2009-10-09 17:02:32Z ab $
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

package org.apache.nutch.urlfilter.prefix;
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre> * limitations under the License.
 */

// $Id: PrefixURLFilter.java 823614 2009-10-09 17:02:32Z ab $

package org.apache.nutch.urlfilter.prefix;
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">PrefixURLFilter.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Package declaration</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    webPageStore.flush();

    Configuration myConfiguration = new Configuration(conf);
<<<<<<< HEAD
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 1);
    Path generatedSegment = generateFetchlist(Integer.MAX_VALUE,
        myConfiguration, false);
=======
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    webPageStore.flush();

    Configuration myConfiguration = new Configuration(conf);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);

    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    assertEquals(1, fetchList.size());

    myConfiguration = new Configuration(conf);
<<<<<<< HEAD
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 2);
    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
        false);

    fetchlistPath = new Path(new Path(generatedSegment,
        CrawlDatum.GENERATE_DIR_NAME), "part-00000");
=======
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    assertEquals(1, fetchList.size());

    myConfiguration = new Configuration(conf);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    assertEquals(2, fetchList.size());

    myConfiguration = new Configuration(conf);
<<<<<<< HEAD
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 3);
    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
        false);

    fetchlistPath = new Path(new Path(generatedSegment,
        CrawlDatum.GENERATE_DIR_NAME), "part-00000");
=======
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    assertEquals(2, fetchList.size());

    myConfiguration = new Configuration(conf);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>
  /**
   * Test that generator obeys the property "generator.max.count" and
<<<<<<< HEAD
   * "generator.count.per.domain".
   * @throws Exception 
   */
  public void testGenerateDomainLimit() throws Exception{
    ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();

    list.add(createURLCrawlDatum("http://a.example.com/index.html", 1, 1));
    list.add(createURLCrawlDatum("http://b.example.com/index.html", 1, 1));
    list.add(createURLCrawlDatum("http://c.example.com/index.html", 1, 1));

    createCrawlDB(list);

    Configuration myConfiguration = new Configuration(conf);
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 1);
    myConfiguration.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_DOMAIN);

    Path generatedSegment = generateFetchlist(Integer.MAX_VALUE,
        myConfiguration, false);
=======
   * "generator.count.value=domain".
   *
   * @throws Exception
   */
  public void testGenerateDomainLimit() throws Exception {
    ArrayList<URLWebPage> list = new ArrayList<URLWebPage>();

    list.add(createURLWebPage("http://one.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://one.example.com/index1.html", 1, 1));
    list.add(createURLWebPage("http://two.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://two.example.com/index1.html", 1, 1));
    list.add(createURLWebPage("http://three.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://three.example.com/index1.html", 1, 1));

    for (URLWebPage uwp : list) {
      webPageStore.put(TableUtil.reverseUrl(uwp.getUrl()), uwp.getDatum());
    }
    webPageStore.flush();
    
    Configuration myConfiguration = new Configuration(conf);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>
  /**
   * Test that generator obeys the property "generator.max.count" and
   * "generator.count.value=domain".
   *
   * @throws Exception
   */
  public void testGenerateDomainLimit() throws Exception {
    ArrayList<URLWebPage> list = new ArrayList<URLWebPage>();

    list.add(createURLWebPage("http://one.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://one.example.com/index1.html", 1, 1));
    list.add(createURLWebPage("http://two.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://two.example.com/index1.html", 1, 1));
    list.add(createURLWebPage("http://three.example.com/index.html", 1, 1));
    list.add(createURLWebPage("http://three.example.com/index1.html", 1, 1));

    for (URLWebPage uwp : list) {
      webPageStore.put(TableUtil.reverseUrl(uwp.getUrl()), uwp.getDatum());
    }
    webPageStore.flush();
    
    Configuration myConfiguration = new Configuration(conf);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 1);
    myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_DOMAIN);

    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> For statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method signature</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    assertEquals(1, fetchList.size());

    myConfiguration = new Configuration(myConfiguration);
<<<<<<< HEAD
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 2);
    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
=======
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    assertEquals(1, fetchList.size());

    myConfiguration = new Configuration(myConfiguration);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>
        false);
    assertEquals(2, fetchList.size());

    myConfiguration = new Configuration(myConfiguration);
<<<<<<< HEAD
    myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 3);
    generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration,
    fetchlistPath = new Path(new Path(generatedSegment,
        CrawlDatum.GENERATE_DIR_NAME), "part-00000");
=======
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    assertEquals(2, fetchList.size());

    myConfiguration = new Configuration(myConfiguration);
    myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
    generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);

    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>  private void generateFetchlist(int numResults, Configuration config,
      boolean filter) throws Exception {
    // generate segment
<<<<<<< HEAD
    Generator g = new Generator(config);
    Path[] generatedSegment = g.generate(dbDir, segmentsDir, -1, numResults,
        Long.MAX_VALUE, filter, false);
    if (generatedSegment==null) return null;
    return generatedSegment[0];
=======
    GeneratorJob g = new GeneratorJob();
    g.setConf(config);
    String crawlId = g.generate(numResults, System.currentTimeMillis(), filter, false);
    if (crawlId == null)
      throw new RuntimeException("Generator failed");
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  /**</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>  private void generateFetchlist(int numResults, Configuration config,
      boolean filter) throws Exception {
    // generate segment
    GeneratorJob g = new GeneratorJob();
    g.setConf(config);
    String crawlId = g.generate(numResults, System.currentTimeMillis(), filter, false);
    if (crawlId == null)
      throw new RuntimeException("Generator failed");
  }

  /**</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestGenerator.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Array access</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> If statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Return statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    server.start();
  }

<<<<<<< HEAD
  protected void tearDown() throws Exception{
=======
  public void tearDown() throws Exception{
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
    server.stop();
    fs.delete(testdir, true);
  }</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    server.start();
  }

  public void tearDown() throws Exception{
    server.stop();
    fs.delete(testdir, true);
  }</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestFetcher.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method signature</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    injector.inject(urlPath);

    //generate
<<<<<<< HEAD
    Generator g=new Generator(conf);
    Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1,
        Long.MAX_VALUE, Long.MAX_VALUE, false, false);
=======
    long time = System.currentTimeMillis();
    GeneratorJob g = new GeneratorJob(conf);
    String crawlId = g.generate(Long.MAX_VALUE, time, false, false);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    //fetch
<<<<<<< HEAD</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    injector.inject(urlPath);

    //generate
    long time = System.currentTimeMillis();
    GeneratorJob g = new GeneratorJob(conf);
    String crawlId = g.generate(Long.MAX_VALUE, time, false, false);

    //fetch</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestFetcher.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    //fetch
<<<<<<< HEAD
    Fetcher fetcher=new Fetcher(conf);
    fetcher.fetch(generatedSegment[0], 1, true);
=======
    time = System.currentTimeMillis();
    conf.setBoolean(FetcherJob.PARSE_KEY, true);
    FetcherJob fetcher = new FetcherJob(conf);
    fetcher.fetch(1, crawlId, false, true);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

    time = System.currentTimeMillis() - time;
    </pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>
    //fetch
    time = System.currentTimeMillis();
    conf.setBoolean(FetcherJob.PARSE_KEY, true);
    FetcherJob fetcher = new FetcherJob(conf);
    fetcher.fetch(1, crawlId, false, true);

    time = System.currentTimeMillis() - time;
    </pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestFetcher.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>        conf.getFloat("fetcher.server.delay", 5));
    assertTrue(time > minimumTime);
    
<<<<<<< HEAD
    //verify content
    Path content=new Path(new Path(generatedSegment[0], Content.DIR_NAME),"part-00000/data");
    SequenceFile.Reader reader=new SequenceFile.Reader(fs, content, conf);
    
    ArrayList<String> handledurls=new ArrayList<String>();
    
    READ_CONTENT:
      do {
      Text key=new Text();
      Content value=new Content();
      if(!reader.next(key, value)) break READ_CONTENT;
      String contentString=new String(value.getContent());
      if(contentString.indexOf("Nutch fetcher test page")!=-1) { 
        handledurls.add(key.toString());
=======
    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
    assertEquals(urls.size(), pages.size());
    List<String> handledurls = new ArrayList<String>();
    for (URLWebPage up : pages) {
      ByteBuffer bb = up.getDatum().getContent();
      if (bb == null) {
        continue;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      }
      String content = new String(bb.array());
      if (content.indexOf("Nutch fetcher test page")!=-1) {</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>        conf.getFloat("fetcher.server.delay", 5));
    assertTrue(time > minimumTime);
    
    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
    assertEquals(urls.size(), pages.size());
    List<String> handledurls = new ArrayList<String>();
    for (URLWebPage up : pages) {
      ByteBuffer bb = up.getDatum().getContent();
      if (bb == null) {
        continue;
      }
      String content = new String(bb.array());
      if (content.indexOf("Nutch fetcher test page")!=-1) {</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestFetcher.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Continue statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Do statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> For statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> If statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    Collections.sort(handledurls);
    //verify that correct pages were handled
    assertTrue(handledurls.containsAll(urls));
    assertTrue(urls.containsAll(handledurls));
<<<<<<< HEAD
    
    handledurls.clear();

    //verify parse data
    Path parseData = new Path(new Path(generatedSegment[0], ParseData.DIR_NAME),"part-00000/data");
    reader = new SequenceFile.Reader(fs, parseData, conf);
    
    READ_PARSE_DATA:
      do {
      Text key = new Text();
      ParseData value = new ParseData();
      if(!reader.next(key, value)) break READ_PARSE_DATA;
      // make sure they all contain "nutch.segment.name" and "nutch.content.digest" 
      // keys in parse metadata
      Metadata contentMeta = value.getContentMeta();
      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null 
            && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
        handledurls.add(key.toString());
      }
    } while(true);
    

    assertEquals(urls.size(), handledurls.size());

    assertTrue(handledurls.containsAll(urls));
    assertTrue(urls.containsAll(handledurls));
=======
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
  }

  private void addUrl(ArrayList<String> urls, String page) {</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    //verify that correct pages were handled
    assertTrue(handledurls.containsAll(urls));
    assertTrue(urls.containsAll(handledurls));
  }

  private void addUrl(ArrayList<String> urls, String page) {</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestFetcher.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Comment</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Do statement</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>import org.apache.hadoop.mapred.JobConf;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
<<<<<<< HEAD
=======
import org.apache.nutch.util.NutchJobConf;
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f

/**
 * Unit tests for the plugin system</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>import org.apache.hadoop.mapred.JobConf;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchJobConf;

/**
 * Unit tests for the plugin system</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestPluginSystem.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Import</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>    public void testRepositoryCache() {
      Configuration config = NutchConfiguration.create();
      PluginRepository repo = PluginRepository.get(config);
<<<<<<< HEAD
      JobConf job = new NutchJob(config);
=======
      JobConf job = new NutchJobConf(config);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      PluginRepository repo1 = PluginRepository.get(job);
      assertTrue(repo == repo1);
      // now construct a config without UUID</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>    public void testRepositoryCache() {
      Configuration config = NutchConfiguration.create();
      PluginRepository repo = PluginRepository.get(config);
      JobConf job = new NutchJobConf(config);
      PluginRepository repo1 = PluginRepository.get(job);
      assertTrue(repo == repo1);
      // now construct a config without UUID</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestPluginSystem.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            <div class="table-responsive-vertical shadow-z-1">
                <table id="table" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Chunk</th>
                        </tr>
                    </thead>

                </table>
        <div>
<div class="table-responsive-vertical shadow-z-1" style="width: 80%; float: left; position: relative;">
                <!-- Table starts here -->
                <table id="tableConflict" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Conflicting content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

                        <tr>
                            <td data-title="ConflictingContent"><pre>      config.addResource("nutch-default.xml");
      config.addResource("nutch-site.xml");
      repo = PluginRepository.get(config);
<<<<<<< HEAD
      job = new NutchJob(config);
=======
      job = new NutchJobConf(config);
>>>>>>> fc6a7f5eaeec70967b5845ee0079b660c3a0de8f
      repo1 = PluginRepository.get(job);
      assertTrue(repo1 != repo);
    }</pre></td>
                        </tr>

                    </tbody>
                </table>

                <table id="tableSolution" class="table table-hover table-mc-light-blue">
                    <thead>
                        <tr>
                            <th>Solution content</th>
                            
                        </tr>
                    </thead>
                    <tbody>

<tr>
                            <td data-title="SolutionContent"><pre>      config.addResource("nutch-default.xml");
      config.addResource("nutch-site.xml");
      repo = PluginRepository.get(config);
      job = new NutchJobConf(config);
      repo1 = PluginRepository.get(job);
      assertTrue(repo1 != repo);
    }</pre></td>
                        </tr>

                    </tbody>
                </table>
            </div>
        </div>
<div class="table-responsive-vertical shadow-z-1" style="width: 17.5%; float: right; position: relative;">
        
            <table id="tableFileName" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>File</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="FileName">TestPluginSystem.java</td>
                    </tr>
                </tbody>
            </table>
            <table id="tableDeveloperDecision" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Developer's decision</th>                            
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td data-title="DeveloperDecision">Version 2</td>
                    </tr>
                </tbody>
            </table>

            <table id="tableKindConflict" class="table table-hover table-mc-light-blue">
                <thead>
                    <tr>
                        <th>Kind of conflict</th>                            
                    </tr>
                </thead>
                <tbody>

                    <tr>
                        <td data-title="KindConflict">Method invocation</td>
                    </tr>

                    <tr>
                        <td data-title="KindConflict"> Variable</td>
                    </tr>

                </tbody>
            </table>
        </div>
            </div>
        </div>
        
        <!-- Última versão JavaScript compilada e minificada -->
        <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" ></script>
        <script type="text/javascript" src="script.js"></script>
    </body>
</html>