XML Processing

Library

Many useful steps can be implemented directly in XProc. This page identifies a few such steps.

Recursive directory list

The p:directory-list step will return the contents of a single directory. The l:recursive-directory-list step will process a directory and it's subdirectories recursively.

<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" version="1.0"
		xmlns:c="http://www.w3.org/ns/xproc-step"
                xmlns:l="http://xproc.org/library"
                type="l:recursive-directory-list">
  <p:output port="result"/>
  <p:option name="path" required="true"/>
  <p:option name="include-filter"/>
  <p:option name="exclude-filter"/>
  <p:option name="depth" select="-1"/>

  <p:choose>
    <p:when test="p:value-available('include-filter')
                  and p:value-available('exclude-filter')">
      <p:directory-list>
        <p:with-option name="path" select="$path"/>
        <p:with-option name="include-filter" select="$include-filter"/>
        <p:with-option name="exclude-filter" select="$exclude-filter"/>
      </p:directory-list>
    </p:when>

    <p:when test="p:value-available('include-filter')">
      <p:directory-list>
        <p:with-option name="path" select="$path"/>
        <p:with-option name="include-filter" select="$include-filter"/>
      </p:directory-list>
    </p:when>

    <p:when test="p:value-available('exclude-filter')">
      <p:directory-list>
        <p:with-option name="path" select="$path"/>
        <p:with-option name="exclude-filter" select="$exclude-filter"/>
      </p:directory-list>
    </p:when>

    <p:otherwise>
      <p:directory-list>
        <p:with-option name="path" select="$path"/>
      </p:directory-list>
    </p:otherwise>
  </p:choose>

  <p:viewport match="/c:directory/c:directory">
    <p:variable name="name" select="/*/@name"/>

    <p:choose>
      <p:when test="$depth != 0">
        <p:choose>
          <p:when test="p:value-available('include-filter')
                        and p:value-available('exclude-filter')">
            <l:recursive-directory-list>
              <p:with-option name="path" select="concat($path,'/',$name)"/>
              <p:with-option name="include-filter" select="$include-filter"/>
              <p:with-option name="exclude-filter" select="$exclude-filter"/>
              <p:with-option name="depth" select="$depth - 1"/>
            </l:recursive-directory-list>
          </p:when>

          <p:when test="p:value-available('include-filter')">
            <l:recursive-directory-list>
              <p:with-option name="path" select="concat($path,'/',$name)"/>
              <p:with-option name="include-filter" select="$include-filter"/>
              <p:with-option name="depth" select="$depth - 1"/>
            </l:recursive-directory-list>
          </p:when>

          <p:when test="p:value-available('exclude-filter')">
            <l:recursive-directory-list>
              <p:with-option name="path" select="concat($path,'/',$name)"/>
              <p:with-option name="exclude-filter" select="$exclude-filter"/>
              <p:with-option name="depth" select="$depth - 1"/>
            </l:recursive-directory-list>
          </p:when>

          <p:otherwise>
            <l:recursive-directory-list>
              <p:with-option name="path" select="concat($path,'/',$name)"/>
              <p:with-option name="depth" select="$depth - 1"/>
            </l:recursive-directory-list>
          </p:otherwise>
        </p:choose>
      </p:when>
      <p:otherwise>
	<p:identity/>
      </p:otherwise>
    </p:choose>
  </p:viewport>

</p:declare-step>

Validator.nu

The l:validator.nu step sends a document to the http://validator.nu/ web service for validation.

<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" version="1.0"
                xmlns:c="http://www.w3.org/ns/xproc-step"
                xmlns:l="http://xproc.org/library"
                xpath-version="2.0"
                type="l:validator.nu">
  <p:input port="source"/>
  <p:output port="result"/>
  <p:option name="uri" select="'http://validator.nu/'"/>
  <p:option name="out" select="'xml'"/>
  <p:option name="showsource" select="''"/>
  <p:option name="level" select="''"/>
  <p:option name="schema" select="''"/>
  <p:option name="laxtype" select="''"/>
  <p:option name="parser" select="''"/>
  <p:option name="asciiquotes" select="''"/>

  <p:variable name="href"
              select="concat($uri, '?',
                             if ($out = '') then 'out=xml' else concat('out=',$out),
                             if ($showsource = '') then '' else concat('&amp;showsource=',$showsource),
                             if ($level = '') then '' else concat('&amp;level=',$level),
                             if ($schema = '') then '' else concat('&amp;schema=',$schema),
                             if ($laxtype = '') then '' else concat('&amp;laxtype=',$laxtype),
                             if ($parser = '') then '' else concat('&amp;parser=',$parser),
                             if ($asciiquotes = '') then '' else concat('&amp;asciiquotes=',$asciiquotes))"/>

  <p:choose>
    <p:when test="/c:body">
      <p:identity/>
    </p:when>
    <p:otherwise>
      <p:wrap wrapper="c:body" match="/"/>
      <p:add-attribute match="/c:body" attribute-name="content-type" attribute-value="application/xml"/>
    </p:otherwise>
  </p:choose>

  <p:wrap wrapper="c:request" match="/"/>
  <p:add-attribute match="/c:request" attribute-name="href">
    <p:with-option name="attribute-value" select="$href"/>
  </p:add-attribute>
  <p:add-attribute match="/c:request" attribute-name="method" attribute-value="post"/>

  <p:http-request/>
</p:declare-step>

http-get

The l:http-get step does a simple HTTP “GET” request. If the returned content is text/html or application/json, p:unescape-markup is used to transform the result into XML. Note that support for “unescaping” markup for non-XML content types is implementation defined.

<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" version="1.0"
		xmlns:c="http://www.w3.org/ns/xproc-step"
                xmlns:l="http://xproc.org/library"
                type="l:http-get">
  <p:output port="result"/>
  <p:option name="href" required="true"/>
  <p:option name="username"/>
  <p:option name="password"/>
  <p:option name="auth-method" select="'Basic'"/>
  <p:option name="send-authorization" select="'false'"/>
  <p:option name="override-content-type"/>

  <p:choose>
    <p:when test="p:value-available('username')">
      <p:template>
        <p:input port="template">
          <p:inline>
            <c:request method="get" href="{$href}" detailed="false" status-only="false"
                       username="{$username}" password="{$password}" auth-method="{$auth-method}"
                       send-authorization="{$send-authorization}"/>
          </p:inline>
        </p:input>
        <p:input port="source"><p:empty/></p:input>
        <p:with-param name="href" select="$href"/>
        <p:with-param name="username" select="$username"/>
        <p:with-param name="password" select="$password"/>
        <p:with-param name="auth-method" select="$auth-method"/>
        <p:with-param name="send-authorization" select="$send-authorization"/>
      </p:template>
    </p:when>
    <p:otherwise>
      <p:template>
        <p:input port="template">
          <p:inline>
            <c:request method="get" href="{$href}" detailed="false" status-only="false"/>
          </p:inline>
        </p:input>
        <p:input port="source"><p:empty/></p:input>
        <p:with-param name="href" select="$href"/>
      </p:template>
    </p:otherwise>
  </p:choose>

  <p:choose>
    <p:when test="p:value-available('override-content-type')">
      <p:add-attribute match="/c:request" attribute-name="override-content-type">
        <p:with-option name="attribute-value" select="$override-content-type"/>
      </p:add-attribute>
    </p:when>
    <p:otherwise>
      <p:identity/>
    </p:otherwise>
  </p:choose>

  <p:http-request/>

  <p:choose>
    <p:when test="starts-with(/c:body/@content-type,'text/html') and /c:body/@encoding = 'base64'
                  and contains(/c:body/@content-type, 'charset')">
      <p:unescape-markup content-type="text/html" encoding="base64"/>
    </p:when>
    <p:when test="starts-with(/c:body/@content-type,'text/html') and /c:body/@encoding = 'base64'">
      <!-- Per RFC 2616, the default charset for text/* types is ISO 8859-1 -->
      <p:unescape-markup content-type="text/html" encoding="base64" charset="iso-8859-1"/>
    </p:when>
    <p:when test="starts-with(/c:body/@content-type,'text/html')">
      <p:unescape-markup content-type="text/html"/>
    </p:when>
    <p:when test="/c:body/@content-type = 'application/json'">
      <p:unescape-markup content-type="application/json" encoding="base64" charset="utf-8"/>
    </p:when>
    <p:otherwise>
      <p:identity/>
    </p:otherwise>
  </p:choose>

  <p:choose>
    <p:when test="/c:body/*">
      <p:unwrap match="/c:body"/>
    </p:when>
    <p:otherwise>
      <p:identity/>
    </p:otherwise>
  </p:choose>
</p:declare-step>

store

The l:store step saves its input to the file specified with the href option. (You can also specify the output method and encoding, but the other serialization parameters are not supported.)

It produces a copy of its source on the result port and the URI where the file was stored on the uri port.

<p:declare-step xmlns:p="http://www.w3.org/ns/xproc" version="1.0"
                xmlns:l="http://xproc.org/library"
                type="l:store" name="main">
  <p:input port="source" primary="true"/>
  <p:output port="result" primary="true"/>
  <p:output port="uri">
    <p:pipe step="store" port="result"/>
  </p:output>
  <p:option name="href" required="true"/>
  <p:option name="encoding" select="'utf-8'"/>
  <p:option name="method" select="'xml'"/>

  <p:store name="store">
    <p:with-option name="href" select="$href"/>
    <p:with-option name="method" select="$method"/>
    <p:with-option name="encoding" select="$encoding"/>
  </p:store>

  <p:identity>
    <p:input port="source">
      <p:pipe step="main" port="source"/>
    </p:input>
  </p:identity>
</p:declare-step>

relax-ng-report

The l:relax-ng-report step performs RELAX NG validation, returning two results: the source document (validated, if validation succeeds) and a report of the validation errors (if any).

<p:declare-step version='1.0' name="main" type="l:relax-ng-report"
                xmlns:p="http://www.w3.org/ns/xproc"
                xmlns:c="http://www.w3.org/ns/xproc-step"
                xmlns:l="http://xproc.org/library">
  <p:input port="source" primary="true"/>
  <p:input port="schema"/>
  <p:output port="result" primary="true"/>
  <p:output port="report" sequence="true">
    <p:pipe step="try" port="report"/>
  </p:output>
  <p:option name="dtd-attribute-values" select="'false'"/>
  <p:option name="dtd-id-idref-warnings" select="'false'"/>
  <p:option name="assert-valid" select="'false'"/> <!-- yes, false by default! -->

  <p:try name="try">
    <p:group>
      <p:output port="result" primary="true">
        <p:pipe step="v-rng" port="result"/>
      </p:output>
      <p:output port="report">
        <p:empty/>
      </p:output>

      <p:validate-with-relax-ng name="v-rng" assert-valid="true">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
        <p:input port="schema">
          <p:pipe step="main" port="schema"/>
        </p:input>
        <p:with-option name="dtd-attribute-values" select="$dtd-attribute-values"/>
        <p:with-option name="dtd-id-idref-warnings" select="$dtd-id-idref-warnings"/>
      </p:validate-with-relax-ng>
    </p:group>
    <p:catch name="catch">
      <p:output port="result" primary="true">
        <p:pipe step="copy-source" port="result"/>
      </p:output>
      <p:output port="report">
        <p:pipe step="copy-errors" port="result"/>
      </p:output>
      <p:identity name="copy-source">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
      </p:identity>
      <p:identity name="copy-errors">
        <p:input port="source">
          <p:pipe step="catch" port="error"/>
        </p:input>
      </p:identity>
    </p:catch>
  </p:try>

  <p:count name="count">
    <p:input port="source">
      <p:pipe step="try" port="report"/>
    </p:input>
  </p:count>

  <p:choose>
    <p:when test="$assert-valid = 'true' and /c:result != '0'">
      <!-- This isn't very efficient, but it's an error case so that's
           probably ok. In any event, it assures that l:relax-ng-report
           raises the same errors that the validation raises. -->
      <p:validate-with-relax-ng name="v-rng" assert-valid="true">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
        <p:input port="schema">
          <p:pipe step="main" port="schema"/>
        </p:input>
        <p:with-option name="dtd-attribute-values" select="$dtd-attribute-values"/>
        <p:with-option name="dtd-id-idref-warnings" select="$dtd-id-idref-warnings"/>
      </p:validate-with-relax-ng>
    </p:when>
    <p:otherwise>
      <p:identity>
        <p:input port="source">
          <p:pipe step="try" port="result"/>
        </p:input>
      </p:identity>
    </p:otherwise>
  </p:choose>

</p:declare-step>

xml-schema-report

The l:xml-schema-report step performs W3C XML Schema validation, returning two results: the source document (validated, if validation succeeds) and a report of the validation errors (if any).

<p:declare-step version='1.0' name="main" type="l:xml-schema-report"
                xmlns:p="http://www.w3.org/ns/xproc"
                xmlns:c="http://www.w3.org/ns/xproc-step"
                xmlns:l="http://xproc.org/library">
  <p:input port="source" primary="true"/>
  <p:input port="schema" sequence="true"/>
  <p:output port="result" primary="true"/>
  <p:output port="report" sequence="true">
    <p:pipe step="try" port="report"/>
  </p:output>
  <p:option name="use-location-hints" select="'false'"/>
  <p:option name="try-namespaces" select="'false'"/>
  <p:option name="mode" select="'strict'"/>
  <p:option name="assert-valid" select="'false'"/> <!-- yes, false by default! -->

  <p:try name="try">
    <p:group>
      <p:output port="result" primary="true">
        <p:pipe step="v-xsd" port="result"/>
      </p:output>
      <p:output port="report">
        <p:empty/>
      </p:output>

      <p:validate-with-xml-schema name="v-xsd" assert-valid="true">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
        <p:input port="schema">
          <p:pipe step="main" port="schema"/>
        </p:input>
        <p:with-option name="use-location-hints" select="$use-location-hints"/>
        <p:with-option name="try-namespaces" select="$try-namespaces"/>
        <p:with-option name="mode" select="$mode"/>
      </p:validate-with-xml-schema>
    </p:group>
    <p:catch name="catch">
      <p:output port="result" primary="true">
        <p:pipe step="copy-source" port="result"/>
      </p:output>
      <p:output port="report">
        <p:pipe step="copy-errors" port="result"/>
      </p:output>
      <p:identity name="copy-source">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
      </p:identity>
      <p:identity name="copy-errors">
        <p:input port="source">
          <p:pipe step="catch" port="error"/>
        </p:input>
      </p:identity>
    </p:catch>
  </p:try>

  <p:count name="count">
    <p:input port="source">
      <p:pipe step="try" port="report"/>
    </p:input>
  </p:count>

  <p:choose>
    <p:when test="$assert-valid = 'true' and /c:result != '0'">
      <!-- This isn't very efficient, but it's an error case so that's
           probably ok. In any event, it assures that l:xml-schema-report
           raises the same errors that the validation raises. -->
      <p:validate-with-xml-schema name="v-rng" assert-valid="true">
        <p:input port="source">
          <p:pipe step="main" port="source"/>
        </p:input>
        <p:input port="schema">
          <p:pipe step="main" port="schema"/>
        </p:input>
        <p:with-option name="use-location-hints" select="$use-location-hints"/>
        <p:with-option name="try-namespaces" select="$try-namespaces"/>
        <p:with-option name="mode" select="$mode"/>
      </p:validate-with-xml-schema>
    </p:when>
    <p:otherwise>
      <p:identity>
        <p:input port="source">
          <p:pipe step="try" port="result"/>
        </p:input>
      </p:identity>
    </p:otherwise>
  </p:choose>

</p:declare-step>