solr dataimport 数据导入源码分析补充

lichaoyue888 · 发表于 2015-7-17 12:16:17

　　上部分的代码还可以进一步优化，主要是构建Collection 集合，分批次提交，优化新增索引速度
　　其实分页方式也是分批次提交的，不过这种方式更优雅
　　参考如下代码

import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Collection;

import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;

public class Test
{
private static int fetchSize = 1000;
private static String url = "http://localhost:8983/solr/core1/";
private static CommonsHttpSolrServer solrCore;

public Test() throws MalformedURLException
{
      solrCore = new CommonsHttpSolrServer(url);
}

/**
   * Takes an SQL ResultSet and adds the documents to solr. Does it in batches
   * of fetchSize.
   *
   * @param rs
   *          A ResultSet from the database.
   * @return The number of documents added to solr.
   * @throws SQLException
   * @throws SolrServerException
   * @throws IOException
   */
public long addResultSet(ResultSet rs) throws SQLException,
         SolrServerException, IOException
{
      long count = 0;
      int innerCount = 0;
      Collection docs = new ArrayList();
      ResultSetMetaData rsm = rs.getMetaData();
      int numColumns = rsm.getColumnCount();
      String[] colNames = new String[numColumns + 1];

      /**
      * JDBC numbers the columns starting at 1, so the normal java convention
      * of starting at zero won't work.
      */
      for (int i = 1; i < (numColumns + 1); i++)
      {
         colNames = rsm.getColumnName(i);
         /**
         * If there are fields that you want to handle manually, check for
         * them here and change that entry in colNames to null. This will
         * cause the loop in the next section to skip that database column.
         */
         // //Example:
         // if (rsm.getColumnName(i) == "db_id")
         // {
         // colNames = null;
         // }
      }

      while (rs.next())
      {
         count++;
         innerCount++;

         SolrInputDocument doc = new SolrInputDocument();

         /**
         * At this point, take care of manual document field assignments for
         * which you previously assigned the colNames entry to null.
         */
         // //Example:
         // doc.addField("solr_db_id", rs.getLong("db_id"));

         for (int j = 1; j < (numColumns + 1); j++)
         {
            if (colNames[j] != null)
            {
                  Object f;
                  switch (rsm.getColumnType(j))
                  {
                     case Types.BIGINT:
                     {
                        f = rs.getLong(j);
                        break;
                     }
                     case Types.INTEGER:
                     {
                        f = rs.getInt(j);
                        break;
                     }
                     case Types.DATE:
                     {
                        f = rs.getDate(j);
                        break;
                     }
                     case Types.FLOAT:
                     {
                        f = rs.getFloat(j);
                        break;
                     }
                     case Types.DOUBLE:
                     {
                        f = rs.getDouble(j);
                        break;
                     }
                     case Types.TIME:
                     {
                        f = rs.getDate(j);
                        break;
                     }
                     case Types.BOOLEAN:
                     {
                        f = rs.getBoolean(j);
                        break;
                     }
                     default:
                     {
                        f = rs.getString(j);
                     }
                  }
                  doc.addField(colNames[j], f);
            }
         }
         docs.add(doc);

         /**
         * When we reach fetchSize, index the documents and reset the inner
         * counter.
         */
         if (innerCount == fetchSize)
         {
            solrCore.add(docs);
            docs.clear();
            innerCount = 0;
         }
      }

      /**
      * If the outer loop ended before the inner loop reset, index the
      * remaining documents.
      */
      if (innerCount != 0)
      {
         solrCore.add(docs);
      }
      return count;
}
}

账号		自动登录	找回密码
密码			立即注册

最新rhel8官方手册三本PDF

winhex数据恢复教程（非常巨大，内容丰富）

KMSpico10.2.0 免费激活Win10/Office2016（

zabbix3.4中文手册，官网完整COPY（2019042

zabbix3.4.1安装部署+微信推送信息+大屏显

VMware vcenter+vSphere 6.5 U2共享

CentOS6.5下redis-3.2.6的安装与配置

[经验分享] solr dataimport 数据导入源码分析补充

浏览过的版块

扫码加入运维网微信交流群

[经验分享] solr dataimport 数据导入源码分析 补充

浏览过的版块

[经验分享] solr dataimport 数据导入源码分析补充