suncool 发表于 2017-12-20 09:48:21

solr6.6 导入 文本(txt/json/xml/csv)文件

<dataConfig>  
<dataSource name="fileDataSource" type="FileDataSource" />
  

  
<!--<document>
  
<entity name="tika-test" processor="TikaEntityProcessor"
  
url="C:/docs/solr-word.pdf" format="text">
  
<field column="Author" name="author" meta="true"/>
  
<field column="title" name="title" meta="true"/>
  
<field column="text" name="text"/>
  
</entity>
  
</document>-->
  

  
<dataSource name="urlDataSource" type="BinURLDataSource" />
  
<!--baseDir="D:/work/Solr/solr-6.6.0/ImportDoc" fileName=".*\.(doc)|(pdf)|(docx)|(txt)"-->
  
<document>
  
<entity name="files" dataSource="null" rootEntity="false"
  
processor="FileListEntityProcessor"
  
baseDir="D:/work/Solr/solr-6.6.0/ImportDoc" fileName=".*\.(json)|(txt)|(csv)|(xml)"
  
onError="skip"
  
recursive="true">
  
<field column="file" name="id"/>
  

  
<field column="fileAbsolutePath" name="filePath" />
  
<field column="fileSize" name="size" />
  
<field column="fileLastModified" name="lastModified" />
  

  
<entity processor="PlainTextEntityProcessor" name="txtfile" url="${files.fileAbsolutePath}" dataSource="fileDataSource">
  
<field column="plainText" name="text"/>
  
</entity>
  
</entity>
  
</document>
  
</dataConfig>
页: [1]
查看完整版本: solr6.6 导入 文本(txt/json/xml/csv)文件