public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
List segments) throws IOException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("SolrIndexer: starting at " + sdf.format(start));
final JobConf job = new NutchJob(getConf());
job.setJobName("index-solr " + solrUrl);
IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
job.set(SolrConstants.SERVER_URL, solrUrl);
NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);
job.setReduceSpeculativeExecution(false);
final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" +
new Random().nextInt());
FileOutputFormat.setOutputPath(job, tmp);
try {
JobClient.runJob(job);
// do the commits once and for all the reducers in one go
SolrServer solr = new CommonsHttpSolrServer(solrUrl);
solr.commit();
long end = System.currentTimeMillis();
LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}
catch (Exception e){
LOG.error(e);
} finally {
FileSystem.get(job).delete(tmp, true);
}
}
|