<?xml version="1.0" encoding="UTF-8"?>
<!--
Document : configMyPipeline.xml
Description: An example Pipeline configuration file
-->
<pipeline>
<driverFactory className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="df0"/>
<!-- The <env> element can be used to add global environment variable values to the pipeline.
In this instance almost all of the stages need a key to tell them what type of data
to process.
-->
<env>
<value key="dataType">STLD</value>
</env>
<!-- The initial stage traverses a directory so that it can feed the filenames of
the files to be processed to the subsequent stages.
The directory path to be traversed is in the feed block following this stage.
The filePattern in the stage block is the pattern to look for within that directory.
-->
<stage className="org.apache.commons.pipeline.stage.FileFinderStage"
driverFactoryId="df0"
filePattern="SALES\.(ASWK|ST(GD|GL|LD))\.N.?\.D\d{5}"/>
<feed>
<value>/mnt/data2/gdsg/sst/npr</value>
</feed>
<stage className="gov.noaa.eds.example.Stage2"
driverFactoryId="df0" />
<!-- Write the data from the SstFileReader stage into the Rich Inventory database. -->
<stage className="gov.noaa.eds.sst2ri.SstWriterRI"
driverFactoryId="df0"/>
</pipeline>
下面是上面例子的总结:
<?xml version="1.0" encoding="UTF-8"?>
<!--
Document : configSimplePipeline.xml
Description: A sample configuration file for a very simple pipeline
-->
<pipeline>
<driverFactory className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="driverFactory"/>
<!--
((1)) The first stage recursively searches the directory given in the feed statement.
The filePattern given will match any files beginning with "HelloWorld".
-->
<stage className="org.apache.commons.pipeline.stage.FileFinderStage"
driverFactoryId="driverFactory"
filePattern="HelloWorld.*"/><!-- ((3)) -->
<!-- Starting directory for the first stage. -->
<feed>
<value>/data/sample</value> <!-- ((4)) -->
</feed>
<!-- ((2)) Report the files found. -->
<stage className="org.apache.commons.pipeline.stage.LogStage"
driverFactoryId="driverFactory" />
</pipeline>
一个driver factory 服务两个stage。driver factory ID是“driverFactory”,并且这个值被用于两个stage上
理论上,pipeline可以仅仅有一个stage,但是这中退化的情况与普通的程序没有什么不同,只是它可以方便的扩展为多个stage。
例三:
带颜色的配置文件如下:
<?xml version="1.0" encoding="UTF-8"?>
<!--
Document : branchingPipeline.xml
Description: Configuration file for a pipeline that takes
user provided files as input, and from that both generates HTML files and
puts data into a database.
-->
<pipeline>
<driverFactory className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="df0"/>
<!--
The <env> element can be used to add global environment variable values to the pipeline.
In this instance almost all of the stages need a key to tell them what type of data
to process.
-->
<env>
<value key="division">West</value> <!-- ((9)) -->
</env>
<!--
((1)) The initial stage traverses a directory so that it can feed the filenames of
of the files to be processed to the subsequent stages.
The directory path to be traversed is in the feed block at the end of this file.
The filePattern in the stage block is the pattern to look for within that directory.
-->
<stage className="org.apache.commons.pipeline.stage.FileFinderStage"
driverFactoryId="df0"
filePattern="SALES\.(ASWK|ST(GD|GL|LD))\.N.?\.D\d{5}"/><!-- ((8)) --><feed>
<value>/data/INPUT/raw</value> <!-- ((7)), ((11)) -->
</feed>
<!--
((2)) This stage is going to select a subset of the files from the previous stage
and orders them for time sequential processing using the date embedded in
the last several characters of the file name.
The filesToProcess is the number of files to emit to the next stage, before
terminating processing. Zero (0) has the special meaning that ALL available
files should be processed.
-->
<stage className="com.demo.pipeline.stages.FileSorterStage"
driverFactoryId="df1"
filesToProcess="0"/>
<!--
((3)) Read the files and create the objects to be passed to stage that writes to
the database and to the stage that writes the data to
HTML files.
WARNING: The value for htmlPipelineKey in the stage declaration here
must exactly match the branch pipeline key further down in this file.
-->
<stage className="com.demo.pipeline.stages.FileReaderStage"
driverFactoryId="df1"
htmlPipelineKey="sales2html"/>
<!--
((4)) Write the data from the FileReaderStage stage into the database.
-->
<stage className="com.demo.pipeline.stages.DatabaseWriterStage"
driverFactoryId="df1">
<datasource user="test"
password="abc123"
type="oracle"
host="brain.demo.com"
port="1521"
database="SALES" />
<database-proxy className="gov.noaa.gdsg.sql.oracle.OracleDatabaseProxy" />
<tablePath path="summary.inventory" /> <!-- ((13)) -->
</stage>
<!--
Write the data from the FileReaderStage stage to HTML files.
The outputFilePath is the path to which we will be writing our summary HTML files.
WARNING: The value for the branch pipeline key declaration here must
exactly match the htmlPipelineKey in the FileReaderStage stage in this file.
-->
<branch>
<pipeline key="sales2html"> <!-- ((10)) --><env>
<value key="division">West</value> <!-- ((14)) -->
</env>
<driverFactory className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="df2">
<property propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="4" fair="false"/>
</driverFactory>