Location: PHPKode > projects > Html2ps > html2ps-2.0.43/parser.xhtml.class.php
class ParserXHTML extends Parser {
  function &process($html, &$pipeline, &$media) {
    // Run the XML parser on the XHTML we've prepared
    $dom_tree = TreeBuilder::build($html);

    // Check if parser returned valid document
    if (is_null($dom_tree)) {
      error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url()));
      die("HTML2PS Error");

     * Detect the base URI for this document. 
     * According to the HTML 4.01 p. 12.4.1:
     * User agents must calculate the base URI according to the following precedences (highest priority to lowest):
     * 1. The base URI is set by the BASE element.
     * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]).
     * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI.

     * Check if BASE element present; use its first occurrence
    $this->_scan_base($dom_tree, $pipeline);

     * @todo fall back to the protocol metadata

     * Parse STYLE / LINK nodes containing CSS references and definitions 
     * This should be done here, as the document body may include STYLE node 
     * (this violates HTML standard, but is rather often appears in Web)
    $css =& $pipeline->get_current_css();
    $css->scan_styles($dom_tree, $pipeline);

    if (!is_null($media)) {
      // Setup media size and margins
      $pipeline->get_page_media(1, $media);

    $body =& traverse_dom_tree_pdf($dom_tree);
    $box =& create_pdf_box($body, $pipeline);   

    return $box;

  function _scan_base(&$root, &$pipeline) {
    switch ($root->node_type()) {
      if ($root->tagname() === 'base') {
         * See HTML 4.01 p 12.4
         * href - this attribute specifies an absolute URI that acts as the base URI for resolving relative URIs.
         * At this moment pipeline object have current document URI on the top of the stack;
         * we should replace it with the value of 'href' attribute of the BASE tag
         * To handle (possibly) incorrect values, we use 'guess_url' function; in this case
         * if 'href' attribute contains absolute value (is it SHOULD be), it will be used;
         * if it is missing or is relative, we'll get more of less usable value base on current
         * document URI.
        $new_url = $pipeline->guess_url($root->get_attribute('href'));

        return true;

      // We continue processing here! 
      $child = $root->first_child();
      while ($child) {
        if ($this->_scan_base($child, $pipeline)) { return; };
        $child = $child->next_sibling();

      return false;

    return false;
Return current item: Html2ps