ycycoco 发表于 2016-12-15 09:16:22

solr build索引性能

测试代码

package mytest;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.CoreContainer;
public class SolrTest {
public static void test1() throws Exception {
HttpSolrServer solrServer = new HttpSolrServer("http://localhost:8080/solr/dtrace");
solrServer.setRequestWriter(new BinaryRequestWriter());
int size = 100;
long begin = System.currentTimeMillis();
for (int i = 0; i < size; ++i) {
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("id", i);
doc1.addField("rowkey", "100 p");
doc1.addField("cf", "[{30:50}]");
doc1.addField("timestamp", System.currentTimeMillis());
solrServer.add(doc1);
}
long end = System.currentTimeMillis();
System.out.println(" add cost:" + (end - begin) + "ms");
begin = System.currentTimeMillis();
solrServer.commit();
end = System.currentTimeMillis();
//System.out.println(" commit " + size + " cost:" + (end - begin) + " ms");
}
static ExecutorService service = Executors.newFixedThreadPool(20);
static CoreContainer container = new CoreContainer("/duitang/data/solr");
static {
container.load();
}
static EmbeddedSolrServer solrServer = new EmbeddedSolrServer(container, "dtrace");
public static void _test2(int round) throws Exception {
int count = 10000;
int size = count * round;
final CountDownLatch latch = new CountDownLatch(count);
//final HttpSolrServer solrServer = new HttpSolrServer("http://localhost:8080/solr/dtrace");
//solrServer.setRequestWriter(new BinaryRequestWriter());
long begin = System.currentTimeMillis();
for (int i = size - count; i < size; ++i) {
final int id = i;
service.submit(new Runnable() {
public void run() {
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("id", id);
doc1.addField("rowkey",
"12345678901234567890 12345678901234567890 12345678901234567890 12345678901234567890 12345678901234567890");
doc1.addField(
"cf",
"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890");
doc1.addField("timestamp", System.currentTimeMillis());
try {
solrServer.add(doc1);
latch.countDown();
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
latch.await();
long end = System.currentTimeMillis();
System.out.println(" add[" + size + "] " + count + " cost:" + (end - begin) + "ms");
//begin = System.currentTimeMillis();
//solrServer.commit(false, false, true);
//solrServer.commit();
//end = System.currentTimeMillis();
//System.out.println(" commit " + size + " cost:" + (end - begin) + "ms");
//service.shutdown();
}
public static void test2() throws Exception {
ExecutorService service = Executors.newFixedThreadPool(1);
int count = 10000;
final CountDownLatch latch = new CountDownLatch(count);
long begin = System.currentTimeMillis();
for (int i = 7342; i <= count; ++i) {
final int index = i;
service.submit(new Runnable() {
public void run() {
try {
_test2(index);
latch.countDown();
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
latch.await();
long end = System.currentTimeMillis();
System.out.println(" add finish " + count + " cost:" + (end - begin) + "ms");
}
public static void test3() throws Exception {
//CoreContainer container = new CoreContainer("/duitang/data/solr");
//container.load();
//EmbeddedSolrServer solrServer = new EmbeddedSolrServer(container, "dtrace");
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("id", 1);
doc1.addField("rowkey", "100 p");
doc1.addField("cf", "[{30:50}]");
doc1.addField("timestamp", System.currentTimeMillis());
solrServer.add(doc1);
//solrServer.commit();
System.out.println("ok");
}
public static void close() {
solrServer.shutdown();
}
public static void main(String[] args) throws Exception {
test2();
//close();
//RAMDirectory rdir = new RAMDirectory();
//String fileList[] = rdir.listAll();
//for (int i = 0; i < fileList.length; i++) {
//
//}
//FileSystem fs = FileSystem.get(null);
// fs.startLocalOutput(fsOutputFile, tmpLocalFile)
//FSDirectory.open(path)
//IndexWriter indexWriter = new IndexWriter();
}
}
 每次build数据,1k字符串。
 
 
一.测试add性能
1.单个add
平均cost:4ms
 
2. 顺序执行,add 100个doc:
add cost:564ms
add cost:404ms
add cost:349ms
add cost:393ms
add cost:368ms
 
3. 20个并发,add 100个doc(增加到30,40个并发,性能没有增加):
add cost:74ms
add cost:78ms
add cost:61ms
add cost:60ms
 
3.20个并发, 不同doc的耗时
add 1000 cost:367ms
add 5000 cost:812ms
add 10000 cost:1624ms
add 20000 cost:3190ms
add 100000 cost:16874ms
 
二. 测试commit性能
1.等到多少条doc,提交一次的耗时
commit 100   cost:600 ms
commit 1000 cost:1053ms
commit 5000 cost:2130ms
commit 10000 cost:2542ms
commit 20000 cost:3057ms
commit 30000 cost:3036ms
commit 40000 cost:3408ms
commit 100000 cost:4808ms
 
三. 并发commit测试:
当开启5个线程并发commit时候,load上升到10+,会抛出异常:
org.apache.solr.client.solrj.impl.HttpSolrServer$RemoteSolrException: Error opening new searcher. exceeded limit of maxWarmingSearchers=2, try again later.
at org.apache.solr.client.solrj.impl.HttpSolrServer.request(HttpSolrServer.java:491)
at org.apache.solr.client.solrj.impl.HttpSolrServer.request(HttpSolrServer.java:197)
at org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(AbstractUpdateRequest.java:117)
at org.apache.solr.client.solrj.SolrServer.commit(SolrServer.java:168)
at org.apache.solr.client.solrj.SolrServer.commit(SolrServer.java:146)
at mytest.SolrTest.test2(SolrTest.java:74)
at mytest.SolrTest$2.run(SolrTest.java:88)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
at java.util.concurrent.FutureTask.run(FutureTask.java:138)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
 
 
三. 网络连接
测试发现solr不会开启长连接。
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46166         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46197         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46243         TIME_WAIT   -               
tcp6       0    183 127.0.0.1:8080          127.0.0.1:46302         ESTABLISHED 20112/java      
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46173         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46228         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46230         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46278         TIME_WAIT   -               
tcp6       0    187 127.0.0.1:8080          127.0.0.1:46298         ESTABLISHED 20112/java      
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46158         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:46299         127.0.0.1:8080          ESTABLISHED 3321/java       
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46137         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46160         TIME_WAIT   -               
tcp6       0      0 127.0.0.1:8080          127.0.0.1:46134         TIME_WAIT   -               
tcp6       0    187 127.0.0.1:8080          127.0.0.1:46288         ESTABLISHED 20112/java      
yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ netstat -antp | grep 8080 | wc -l
331
yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ netstat -antp | grep 8080 | wc -l
411
yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ netstat -antp | grep 8080 | wc -l
660
yunpeng@yunpeng-duitang:/duitang/data/solr$ netstat -antp | grep 8080 | wc -l
10018
 
 
四. 索引数据量
插入30万条数据,单条数据1k,占用磁盘空间:9.5M,30万条1K数据实际应占用300MB,可见luence的索引是经过压缩的。大概是30:1。 (结论不正确,估计是因为测试是大量重复数据有关)
 
五.测试solr各种commit不同配置下的性能。
  1.配置1

<updateHandler class="solr.DirectUpdateHandler2">
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>1000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<updateLog>
<str name="dir">${solr.shard.data.dir:}</str>
</updateLog>
</updateHandler>
  结果:
  1.无法查询到结果。
  2.yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ du -sh data/*
  2.9M data/index
  39M data/tlog
  3. add 5000 cost:2405ms
  2. 配置2:

<updateHandler class="solr.DirectUpdateHandler2">
<autoCommit>
<maxDocs>10000</maxDocs>
<maxTime>1000</maxTime>
<openSearcher>true</openSearcher>
</autoCommit>
<updateLog>
<str name="dir">${solr.shard.data.dir:}</str>
</updateLog>
</updateHandler>
  结果:
  1.可以查询到结果。
  2.yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ du -sh data/*
  1.7M data/index
  27M data/tlog
  3. add5000 cost:2726ms
  3. 配置3:

<updateHandler class="solr.DirectUpdateHandler2">
<autoCommit>
<maxTime>2000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>10000</maxTime>
</autoSoftCommit>
<updateLog>
<str name="dir">${solr.shard.data.dir:}</str>
</updateLog>
</updateHandler>
  结果:
  1.可以查询到结果。
  2.yunpeng@yunpeng-duitang:/duitang/data/solr/dtrace$ du -sh data/*
  2.0M data/index
  31M data/tlog
  3. add 5000 cost:1264ms
五.结论
1. 通过http api solr单机build索引最大QPS 5k。
页: [1]
查看完整版本: solr build索引性能