parse_adf_return_simple_array(); * if( !is_array( $opensam_adf_values ) ) { * print "Could not process Application Description File $adf_filename - $opensam_adf_values"; * return( null ); * } * */ /* ============================================================================ * READ THIS, FRIEND. * ============================================================================ * * WHAT IS RETURNED * * XML and RDF data can arrive in a variety of shapes and flavors. A client module simply wants to * query named entities (elements in the XML document) for their attributes and values. * * This module flattens and simplifies the RDF structure. If you do * $rdf_simple = OpenSAM_ADF_obj->parse_adf_return_simple_array() * * You get an $rdf_simple that is: * * AN ARRAY * The array is indexed by RDF tag names. Thus if you want to examine the element * in the RDF: * * The begin by looking at * $rdf_simple['homePage'] (all namespace prefixes are stripped) * * OF ARRAYS * Once you get an $rdf_simple[tagname] item, that item too is array. It contains * all the elements attributes, indexed by their attribute name (namespace prefixes stripped off). * So, if you want to get the 'resource' attribute of that element, simply do: * $rdf_simple['homePage']['resource'] this will return http://www.inetword.com/hp/ * * SPECIAL 'value' VALUE * To further simplify, we notice that RDF elements can have their distinguished value (the thing it really * represents and that we are looking for) in a variety of locations. For example it could be as CDATA within the * node: * http://www.inetword.com/hp/" * Or it could be in the magic rdf:value attribute: * * Or it could be... * * To simplify your code, we normalize all elements to have both the exact attributes listed * in the RDF file, *and* to have whatever could be viewed as the "VALUE" be placed in a 'value' * attribute: * $rdf_simple['homePage']['value'] * * AND NOW FOR COLLECTIONS * Some RDF elements contain collections of other elements. For example, the openSamType can * list several different types. These are represented in RDF in rdf:Bag, rdf:Alt, or rdf:Seq * nodes. * * We return these collections of nodes as a sub-array in the special '_collection_' index. * A node will therefore not have any 'value' index (unless the RDF explicitly had a something:value= attribute. * * Therefore, for: belaunched usestorage * * We will get: * * $rdf_simple['openSAMTypea']['_collection_'][0]['value'] == belaunched * $rdf_simple['openSAMTypea']['_collection_'][1]['value'] == usestorage * * If you really want to vividly see what we're giving you, the use the * dump_this_guy() * member function. Trust me, it's worth it. */ $G_collection_node_names = array( 'bag'=>1, 'alt'=>1, 'seq'=>1 ); $G_structure_node_names_please_ignore = array( 'li'=>1, 'bag'=>1, 'alt'=>1, 'seq'=>1 ); // // Parse and manage an OpenSAM Application Description File // see parse_adf_return_simple_array(). // class OpenSAM_ADF { var $namespaces_array; var $adf_as_string; var $simplexml_obj; var $ret_simple_array; // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // CONSTRUCTOR // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // function OpenSAM_ADF( $adf_as_string_param ) { global $G_err_info; $this->adf_as_string = $adf_as_string_param; unset( $G_err_info ); // so our error reporting gets reset. } // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // MAIN ENTRY POINT: call this routine, everything else are helpers. // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // function parse_adf_return_simple_array() { $ret = $this->build_simplexml_obj_if_needed(); if( $ret !== true ) return( $ret ); $ret = $this->build_namespaces_array_if_needed(); if( $ret !== true ) return( $ret ); $this->ret_simple_array = array(); $ret = $this->compile_simple_array_walk( $this->simplexml_obj ); if( $ret !== true ) return( $ret ); return( $this->ret_simple_array ); } // just-in-time build the xml object. // RETURNS: true on success, 0 if not needed, string err msg on error. function build_simplexml_obj_if_needed() { global $G_err_info; if( empty( $this->adf_as_string ) ) return( "ERROR: no Application Description String was supplied" ); if( isset( $this->simplexml_obj ) ) return( true ); // we had already done it. // The PHP simplexml() routines have poor error handling. We have to take extreme measures // to capture the errors. // 1. Set an error capture routine: set_error_handler("handleSimpleXmlError"); // 2. Capture exceptions just in case: try { $this->simplexml_obj = simplexml_load_string( $this->adf_as_string, 'SimpleXMLElement', LIBXML_NOCDATA ); restore_error_handler(); if( $this->simplexml_obj === false ) { return( "ERROR: Could not parse XML: " . $G_err_info['errstr'] . " line " . $G_err_info['errline'] ); } } catch( Exception $parse_err ) { return( "Caught simplexml_load_string() Exception.
" ); } return( true ); // success. } /* The LOWDOWN on NAMESPACES * * RDF files typically combine 'vocabularies' in different namespaces to build * a file that can describe different things in a semi standardized way. * * The mechanism for this is xmlns:namespace="SOME URI" to define a namespace * of tags. * * The drawback is that the PHP SimpleXML routines require you to ask for nodes * from a specific namespace rather than just returning you all nodes and telling * you what namespace they were in. * * We therefore have helper routines that assemble all the attributes or children * from all namespaces of the XML. */ // just-in-time gather the name spaces used in the // RDF file. // RETURNS: true on success, 0 if not needed, string err msg on error. function build_namespaces_array_if_needed() { if( isset( $this->namespaces_array ) ) return(0); // not needed. if( !isset( $this->simplexml_obj ) ) return( "ERROR: simple_xml obj not created" ); //safety. $this->namespaces_array = $this->simplexml_obj->getNamespaces( true ); return( true ); // success. } // Get all the children, from *every* name space, of this node: // RETURNS: an array, possibly empty. function get_all_I_mean_all_children( $simplexml_element ) { $ret_children = array(); foreach( $this->namespaces_array as $a_namespace ) { $somechildren = $simplexml_element->children( $a_namespace ); foreach( $somechildren as $achild ) { array_push( $ret_children, $achild ); } } return( $ret_children ); } // Get all the attributes, from *every* name space, of this node: // RETURNS: an array, possibly empty. function get_all_I_mean_all_attributes( $simplexml_element ) { $ret_attributes = array(); foreach( $this->namespaces_array as $a_namespace ) { foreach( $simplexml_element->attributes( $a_namespace ) as $key => $aattr ) { //print_r( $key ); print " equals "; print_r( (string)($aattr) ); print "
\n"; $ret_attributes[$key] = (string)$aattr; } } return( $ret_attributes ); } // Walkd the XML tree and compile all the Application Description File values // we care aboiut. // RETURNS: nothing, just tries its best. function compile_simple_array_walk( $simplexml_element ) { global $G_structure_node_names_please_ignore; if( !is_object( $simplexml_element ) ) return; // safety. $nodeName = $simplexml_element->getName(); if( empty( $G_structure_node_names_please_ignore[strtolower($nodeName)] ) ) { // Not a structural node, we do want to try to get the value: $this->ret_simple_array[ $nodeName ] = $this->get_nodes_value_array_I_mean_really( $simplexml_element ); } // walk the children: $children = $this->get_all_I_mean_all_children( $simplexml_element ); foreach( $children as $child ) { $this->compile_simple_array_walk( $child ); } return( $this->ret_simple_array ); // hooray. } // We have from rdf:Bag, rdf:Alt, ... node that represents a collection. Build a // simple array of node-value pairs from it. function get_array_from_poly_node( $simplexml_polynode ) { $ret_array = array(); $allchildren = $this->get_all_I_mean_all_children( $simplexml_polynode ); foreach( $allchildren as $onechild ) { $nodes_value_array = $this->get_nodes_value_array_I_mean_really( $onechild ); array_push( $ret_array, $nodes_value_array ); } return( $ret_array ); } // Nodes can store their values in various places. Search for the value and other // attributes and return an array structured as: // RETURNS: // retarr['value'] -- what we consider the best bet as the nodes "value". // retarr['attribute1'] - an attribute that was found on the node. // retarr['another_attr'] - another attribute, along with all those found. // function get_nodes_value_array_I_mean_really( $simplexml_element ) { global $G_collection_node_names; $nodeName = $simplexml_element->getName(); // Now, put all the attributes in our return array: $attribs = $this->get_all_I_mean_all_attributes( $simplexml_element ); $retarr = array_pad( $attribs, 0, 0 ); // duplicate the array. Ouch. // Does it contain a collection type of node? If so, then gather all the elements // in the collection into one array for return. $children = $this->get_all_I_mean_all_children( $simplexml_element ); if( count( $children ) == 1 ) { $child_element = $children[0]; $nodeName_child = $child_element->getName(); if( !empty( $G_collection_node_names[strtolower($nodeName_child)] ) ) { $retarr['_collection_'] = $this->get_array_from_poly_node( $child_element ); return( $retarr ); } } // How about the actual literal contents: // It's a simplexml trick that you have to cast to a string() to get it. Nothing else will. $strvalue = trim( (string)$simplexml_element ); if( !empty( $strvalue ) ) $retarr['value'] = $strvalue; // How about some of the attributes that represent a 'value': else if( !empty( $attribs['value'] ) ) $retarr['value'] = $attribs['value']; else if( !empty( $attribs['resource'] ) ) $retarr['value'] = $attribs['resource']; else if( !empty( $attribs['about'] ) ) $retarr['value'] = $attribs['about']; else $retarr['value'] = null; // not found, tried our best. Note: we do want this ['value'] array element to exist. return( $retarr ); } // Diagnostic to help grok all this craziness: function dump_this_guy() { print "OpenSAM_ADF dump:
"; foreach( $this->ret_simple_array as $key => $arr ) { print htmlentities( "<$key> --> " ); print_r( $arr ); print "
\n"; } } } // ERROR HANDLING HANDLER // This callback gets called when simplexml fails to parse a file: // Note: we want to save the 1st error, since subsequent errors can be // side effects. The important thing, when fixing an XML file, is the 1st error. $G_err_info; function handleSimpleXmlError( $errno, $errstr, $errfile, $errline ) { global $G_err_info; if( !isset( $G_err_info ) ) { $G_err_info = array( 'errno' =>$errno, 'errstr' =>$errstr, 'errfile' =>$errfile, 'errline' =>$errline ); } } ?>