liuyuehua 发表于 2016-12-30 07:55:41

Apache module杂记

可以基于正则表达式修改文本内容的apache module:
mod_sed:实现了类似sed功能的module,可以通过正则表达式修改文本内容。apache2.3中加入了这个module,但是这个module也可以用于apache 2.0版本。可以到http://src.opensolaris.org/source/xref/webstack/mod_sed/下载源代码,readme里有相应的编译命令:/http安装路径/bin/apxs -i -c mod_sed.c regexp.c sed0.c sed1.c
mod_substitute:功能和mod_sed类似,默认加入到apache2.2中http://httpd.apache.org/docs/2.2/mod/mod_substitute.html
mod_line_edit:也可以基于正则表达式替换文本内容,可以就该html/css/javascript。但是它和前两者不同的是mod_line_edit的to-pattern可以使用apache的环境变量http://apache.webthing.com/mod_line_edit/,这个功能正是我最近需要的。
例如下面的配置可以在<head>标签后插入一个<meta/>标签并且可以将环境变量unique_id的值添加到meta的属性中去(unique_id环境变量需要mod_unique_id的支持http://lamp.linux.gov.cn/apache/apachemenu/mod/mod_unique_id.html)
lerewriterule "<head>" "<head><meta http-equiv='request-id' content='${unique_id}' />" iv
mod_proxy_html:可以基于html标签进行比较比较精细的内容修改操作http://apache.webthing.com/mod_proxy_html/
以上这些module都是基于apache的过滤器的功能来完成对相应内容的修改(类似servlet里的filter)http://lamp.linux.gov.cn/apache/apachemenu/filter.html
http://lamp.linux.gov.cn/apache/apachemenu/images/filter_arch.gif
mod_line_edit使用经验:
1.mod_line_edit的性能问题:考虑到系统中有500-700k的html页面,因此我就对mod_line_edit在展现1m的静态html做了一下压力测试。测试结果非常不理想,在不添加mod_line_edit时tps在500+,加上mod_line_edit后tps只有1(狂汗...)。
2.mod_line_edit的代码分析:
/* mod_line_edit顾名思义就是对文本内容按行进行编辑,   * 因此mod要先对输出流进行整理,每一行内容收集到一个apr_bucket里,   * 然后将所有行数据放到bbline中,下面的代码实现的就是这个用途。   */bbline = apr_brigade_create(f->r->pool, f->c->bucket_alloc) ;/* first ensure we have no mid-line breaks that might be in the   * middle of a search string causing us to miss it!at the same   * time we split into lines to avoid pattern-matching over big   * chunks of memory.   */while ( b != apr_brigade_sentinel(bb) ) {    if ( !apr_bucket_is_metadata(b) ) {      if ( apr_bucket_read(b, &amp;buf, &amp;bytes, apr_block_read) == apr_success ) {if ( bytes == 0 ) {apr_bucket_remove(b) ;} else while ( bytes > 0 ) {switch (cfg->lineend) {case lineend_unix:    le = memchr(buf, '\n', bytes) ;    break ;case lineend_mac:    le = memchr(buf, '\r', bytes) ;    break ;case lineend_dos:    /* edge-case issue: if a \r\n spans buckets it'll get missed.   * not a problem for present purposes, but would be an issue   * if we claimed to support pattern matching on the lineends.   */    found = 0 ;    le = memchr(buf+1, '\n', bytes-1) ;    while ( le &amp;&amp; !found ) {      if ( le[-1] == '\r' ) {      found = 1 ;      } else {      le = memchr(le+1, '\n', bytes-1 - (le+1 - buf)) ;      }    }    if ( !found )      le = 0 ;    break;case lineend_any:case lineend_unset:    /* edge-case notabug: if a \r\n spans buckets it'll get seen as   * two line-ends.it'll insert the \n as a one-byte bucket.   */    le_n = memchr(buf, '\n', bytes) ;    le_r = memchr(buf, '\r', bytes) ;    if ( le_n != null )      if ( le_n == le_r + sizeof(char))      le = le_n ;      else if ( (le_r < le_n) &amp;&amp; (le_r != null) )      le = le_r ;      else      le = le_n ;    else      le = le_r ;    break;case lineend_none:    le = 0 ;    break;case lineend_custom:    le = memchr(buf, cfg->lechar, bytes) ;    break;}if ( le ) {    /* found a lineend in this bucket. */    offs = 1 + ((unsigned int)le-(unsigned int)buf) / sizeof(char) ;    apr_bucket_split(b, offs) ;    bytes -= offs ;    buf += offs ;    b1 = apr_bucket_next(b) ;    apr_bucket_remove(b);    /* is there any previous unterminated content ? */    if ( !apr_brigade_empty(ctx->bbsave) ) {      /* append this to any content waiting for a lineend */      apr_brigade_insert_tail(ctx->bbsave, b) ;      rv = apr_brigade_pflatten(ctx->bbsave, &amp;fbuf, &amp;fbytes, f->r->pool) ;      /* make b a new bucket of the flattened stuff */      b = apr_bucket_pool_create(fbuf, fbytes, f->r->pool,f->r->connection->bucket_alloc) ;      /* bbsave has been consumed, so clear it */      apr_brigade_cleanup(ctx->bbsave) ;    }    /* b now contains exactly one line */    apr_brigade_insert_tail(bbline, b);    b = b1 ;} else {    /* no lineend found.remember the dangling content */    apr_bucket_remove(b);    apr_brigade_insert_tail(ctx->bbsave, b);    bytes = 0 ;}} /* while bytes > 0 */      } else {/* bucket read failed - oops !let's remove it. */apr_bucket_remove(b);      }    } else if ( apr_bucket_is_eos(b) ) {      /* if there's data to pass, send it in one bucket */      if ( !apr_brigade_empty(ctx->bbsave) ) {      rv = apr_brigade_pflatten(ctx->bbsave, &amp;fbuf, &amp;fbytes, f->r->pool) ;      b1 = apr_bucket_pool_create(fbuf, fbytes, f->r->pool,f->r->connection->bucket_alloc) ;      apr_brigade_insert_tail(bbline, b1);      }      apr_brigade_cleanup(ctx->bbsave) ;      /* start again rather than segfault if a seriously buggy       * filter in front of us sent a bogus eos       */      f->ctx = null ;      /* move the eos to the new brigade */      apr_bucket_remove(b);      apr_brigade_insert_tail(bbline, b);    } else {      /* chop flush or unknown metadata bucket types */      apr_bucket_delete(b);    }    /* ok, reset pointer to what's left (since we're not in a for-loop) */    b = apr_brigade_first(bb) ;}
/* 这里就是循环使用配置的规则处理整理好的每行apr_bucket   */for (i = 0; i < ctx->rewriterules->nelts; ++i) {    for ( b = apr_brigade_first(bbline) ;b != apr_brigade_sentinel(bbline) ;b = apr_bucket_next(b) ) {      if ( !apr_bucket_is_metadata(b)&amp;&amp; (apr_bucket_read(b, &amp;buf, &amp;bytes, apr_block_read) == apr_success)) {if ( rules.flags &amp; m_regex ) {bufp = apr_pstrmemdup(ctx->lpool, buf, bytes) ;while ( ! ap_regexec(rules.from.r, bufp, nmatch, pmatch, 0) ) {    match = pmatch.rm_so ;    subs = ap_pregsub(f->r->pool, rules.to, bufp, nmatch, pmatch) ;    apr_bucket_split(b, match) ;    b1 = apr_bucket_next(b) ;    apr_bucket_split(b1, pmatch.rm_eo - match) ;    b = apr_bucket_next(b1) ;    apr_bucket_delete(b1) ;    b1 = apr_bucket_pool_create(subs, strlen(subs), f->r->pool,f->r->connection->bucket_alloc) ;    apr_bucket_insert_before(b, b1) ;    bufp += pmatch.rm_eo ;}} else {bufp = buf ;while (subs = apr_strmatch(rules.from.s, bufp, bytes),subs != null) {    match = ((unsigned int)subs - (unsigned int)bufp) / sizeof(char) ;    bytes -= match ;    bufp += match ;    apr_bucket_split(b, match) ;    b1 = apr_bucket_next(b) ;    apr_bucket_split(b1, rules.length) ;    b = apr_bucket_next(b1) ;    apr_bucket_delete(b1) ;    bytes -= rules.length ;    bufp += rules.length ;    b1 = apr_bucket_immortal_create(rules.to, strlen(rules.to),f->r->connection->bucket_alloc) ;    apr_bucket_insert_before(b, b1) ;}}      }    }    /* if we used a local pool, clear it now */    if ( (ctx->lpool != f->r->pool) &amp;&amp; (rules.flags &amp; m_regex) ) {      apr_pool_clear(ctx->lpool) ;    }}
正因为是这个filter对所有输出流会进行遍历、整理、拷贝,然后又将整理好的流按行进行处理。这样如果输出大文本势必会影响性能。
3.mod_line_edit的优化:针对我对mod_line_edit的需求比较简单,只是对<head>标记后面追加一些内容。所以没有必要对整个输出流进行遍历。只要对输出流中<head>标签处理以后就可以结束对输出流的处理,直接调用return ap_pass_brigade(f->next, bb) ;将流传递给下一个filter即可。
页: [1]
查看完整版本: Apache module杂记