parse_adf_return_simple_array();
* if( !is_array( $opensam_adf_values ) ) {
* print "Could not process Application Description File $adf_filename - $opensam_adf_values";
* return( null );
* }
*
*/
/* ============================================================================
* READ THIS, FRIEND.
* ============================================================================
*
* WHAT IS RETURNED
*
* XML and RDF data can arrive in a variety of shapes and flavors. A client module simply wants to
* query named entities (elements in the XML document) for their attributes and values.
*
* This module flattens and simplifies the RDF structure. If you do
* $rdf_simple = OpenSAM_ADF_obj->parse_adf_return_simple_array()
*
* You get an $rdf_simple that is:
*
* AN ARRAY
* The array is indexed by RDF tag names. Thus if you want to examine the element
* in the RDF:
*
* The begin by looking at
* $rdf_simple['homePage'] (all namespace prefixes are stripped)
*
* OF ARRAYS
* Once you get an $rdf_simple[tagname] item, that item too is array. It contains
* all the elements attributes, indexed by their attribute name (namespace prefixes stripped off).
* So, if you want to get the 'resource' attribute of that element, simply do:
* $rdf_simple['homePage']['resource'] this will return http://www.inetword.com/hp/
*
* SPECIAL 'value' VALUE
* To further simplify, we notice that RDF elements can have their distinguished value (the thing it really
* represents and that we are looking for) in a variety of locations. For example it could be as CDATA within the
* node:
* http://www.inetword.com/hp/"
* Or it could be in the magic rdf:value attribute:
*
* Or it could be...
*
* To simplify your code, we normalize all elements to have both the exact attributes listed
* in the RDF file, *and* to have whatever could be viewed as the "VALUE" be placed in a 'value'
* attribute:
* $rdf_simple['homePage']['value']
*
* AND NOW FOR COLLECTIONS
* Some RDF elements contain collections of other elements. For example, the openSamType can
* list several different types. These are represented in RDF in rdf:Bag, rdf:Alt, or rdf:Seq
* nodes.
*
* We return these collections of nodes as a sub-array in the special '_collection_' index.
* A node will therefore not have any 'value' index (unless the RDF explicitly had a something:value= attribute.
*
* Therefore, for:
belaunched
usestorage
*
* We will get:
*
* $rdf_simple['openSAMTypea']['_collection_'][0]['value'] == belaunched
* $rdf_simple['openSAMTypea']['_collection_'][1]['value'] == usestorage
*
* If you really want to vividly see what we're giving you, the use the
* dump_this_guy()
* member function. Trust me, it's worth it.
*/
$G_collection_node_names = array( 'bag'=>1, 'alt'=>1, 'seq'=>1 );
$G_structure_node_names_please_ignore = array( 'li'=>1, 'bag'=>1, 'alt'=>1, 'seq'=>1 );
//
// Parse and manage an OpenSAM Application Description File
// see parse_adf_return_simple_array().
//
class OpenSAM_ADF {
var $namespaces_array;
var $adf_as_string;
var $simplexml_obj;
var $ret_simple_array;
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// CONSTRUCTOR
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
function OpenSAM_ADF( $adf_as_string_param ) {
global $G_err_info;
$this->adf_as_string = $adf_as_string_param;
unset( $G_err_info ); // so our error reporting gets reset.
}
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// MAIN ENTRY POINT: call this routine, everything else are helpers.
// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
function parse_adf_return_simple_array() {
$ret = $this->build_simplexml_obj_if_needed();
if( $ret !== true ) return( $ret );
$ret = $this->build_namespaces_array_if_needed();
if( $ret !== true ) return( $ret );
$this->ret_simple_array = array();
$ret = $this->compile_simple_array_walk( $this->simplexml_obj );
if( $ret !== true ) return( $ret );
return( $this->ret_simple_array );
}
// just-in-time build the xml object.
// RETURNS: true on success, 0 if not needed, string err msg on error.
function build_simplexml_obj_if_needed() {
global $G_err_info;
if( empty( $this->adf_as_string ) ) return( "ERROR: no Application Description String was supplied" );
if( isset( $this->simplexml_obj ) ) return( true ); // we had already done it.
// The PHP simplexml() routines have poor error handling. We have to take extreme measures
// to capture the errors.
// 1. Set an error capture routine:
set_error_handler("handleSimpleXmlError");
// 2. Capture exceptions just in case:
try {
$this->simplexml_obj = simplexml_load_string( $this->adf_as_string, 'SimpleXMLElement', LIBXML_NOCDATA );
restore_error_handler();
if( $this->simplexml_obj === false ) {
return( "ERROR: Could not parse XML: " . $G_err_info['errstr'] . " line " . $G_err_info['errline'] );
}
}
catch( Exception $parse_err ) {
return( "Caught simplexml_load_string() Exception.
" );
}
return( true ); // success.
}
/* The LOWDOWN on NAMESPACES
*
* RDF files typically combine 'vocabularies' in different namespaces to build
* a file that can describe different things in a semi standardized way.
*
* The mechanism for this is xmlns:namespace="SOME URI" to define a namespace
* of tags.
*
* The drawback is that the PHP SimpleXML routines require you to ask for nodes
* from a specific namespace rather than just returning you all nodes and telling
* you what namespace they were in.
*
* We therefore have helper routines that assemble all the attributes or children
* from all namespaces of the XML.
*/
// just-in-time gather the name spaces used in the
// RDF file.
// RETURNS: true on success, 0 if not needed, string err msg on error.
function build_namespaces_array_if_needed() {
if( isset( $this->namespaces_array ) ) return(0); // not needed.
if( !isset( $this->simplexml_obj ) ) return( "ERROR: simple_xml obj not created" ); //safety.
$this->namespaces_array = $this->simplexml_obj->getNamespaces( true );
return( true ); // success.
}
// Get all the children, from *every* name space, of this node:
// RETURNS: an array, possibly empty.
function get_all_I_mean_all_children( $simplexml_element ) {
$ret_children = array();
foreach( $this->namespaces_array as $a_namespace ) {
$somechildren = $simplexml_element->children( $a_namespace );
foreach( $somechildren as $achild ) {
array_push( $ret_children, $achild );
}
}
return( $ret_children );
}
// Get all the attributes, from *every* name space, of this node:
// RETURNS: an array, possibly empty.
function get_all_I_mean_all_attributes( $simplexml_element ) {
$ret_attributes = array();
foreach( $this->namespaces_array as $a_namespace ) {
foreach( $simplexml_element->attributes( $a_namespace ) as $key => $aattr ) {
//print_r( $key ); print " equals "; print_r( (string)($aattr) ); print "
\n";
$ret_attributes[$key] = (string)$aattr;
}
}
return( $ret_attributes );
}
// Walkd the XML tree and compile all the Application Description File values
// we care aboiut.
// RETURNS: nothing, just tries its best.
function compile_simple_array_walk( $simplexml_element ) {
global $G_structure_node_names_please_ignore;
if( !is_object( $simplexml_element ) ) return; // safety.
$nodeName = $simplexml_element->getName();
if( empty( $G_structure_node_names_please_ignore[strtolower($nodeName)] ) ) {
// Not a structural node, we do want to try to get the value:
$this->ret_simple_array[ $nodeName ] = $this->get_nodes_value_array_I_mean_really( $simplexml_element );
}
// walk the children:
$children = $this->get_all_I_mean_all_children( $simplexml_element );
foreach( $children as $child ) {
$this->compile_simple_array_walk( $child );
}
return( $this->ret_simple_array ); // hooray.
}
// We have from rdf:Bag, rdf:Alt, ... node that represents a collection. Build a
// simple array of node-value pairs from it.
function get_array_from_poly_node( $simplexml_polynode ) {
$ret_array = array();
$allchildren = $this->get_all_I_mean_all_children( $simplexml_polynode );
foreach( $allchildren as $onechild ) {
$nodes_value_array = $this->get_nodes_value_array_I_mean_really( $onechild );
array_push( $ret_array, $nodes_value_array );
}
return( $ret_array );
}
// Nodes can store their values in various places. Search for the value and other
// attributes and return an array structured as:
// RETURNS:
// retarr['value'] -- what we consider the best bet as the nodes "value".
// retarr['attribute1'] - an attribute that was found on the node.
// retarr['another_attr'] - another attribute, along with all those found.
//
function get_nodes_value_array_I_mean_really( $simplexml_element ) {
global $G_collection_node_names;
$nodeName = $simplexml_element->getName();
// Now, put all the attributes in our return array:
$attribs = $this->get_all_I_mean_all_attributes( $simplexml_element );
$retarr = array_pad( $attribs, 0, 0 ); // duplicate the array. Ouch.
// Does it contain a collection type of node? If so, then gather all the elements
// in the collection into one array for return.
$children = $this->get_all_I_mean_all_children( $simplexml_element );
if( count( $children ) == 1 ) {
$child_element = $children[0];
$nodeName_child = $child_element->getName();
if( !empty( $G_collection_node_names[strtolower($nodeName_child)] ) ) {
$retarr['_collection_'] = $this->get_array_from_poly_node( $child_element );
return( $retarr );
}
}
// How about the actual literal contents:
// It's a simplexml trick that you have to cast to a string() to get it. Nothing else will.
$strvalue = trim( (string)$simplexml_element );
if( !empty( $strvalue ) ) $retarr['value'] = $strvalue;
// How about some of the attributes that represent a 'value':
else if( !empty( $attribs['value'] ) ) $retarr['value'] = $attribs['value'];
else if( !empty( $attribs['resource'] ) ) $retarr['value'] = $attribs['resource'];
else if( !empty( $attribs['about'] ) ) $retarr['value'] = $attribs['about'];
else $retarr['value'] = null; // not found, tried our best. Note: we do want this ['value'] array element to exist.
return( $retarr );
}
// Diagnostic to help grok all this craziness:
function dump_this_guy() {
print "OpenSAM_ADF dump:
";
foreach( $this->ret_simple_array as $key => $arr ) {
print htmlentities( "<$key> --> " );
print_r( $arr );
print "
\n";
}
}
}
// ERROR HANDLING HANDLER
// This callback gets called when simplexml fails to parse a file:
// Note: we want to save the 1st error, since subsequent errors can be
// side effects. The important thing, when fixing an XML file, is the 1st error.
$G_err_info;
function handleSimpleXmlError( $errno, $errstr, $errfile, $errline ) {
global $G_err_info;
if( !isset( $G_err_info ) ) {
$G_err_info = array( 'errno' =>$errno, 'errstr' =>$errstr, 'errfile' =>$errfile, 'errline' =>$errline );
}
}
?>