Tryag File Manager
Home
-
Turbo Force
Current Path :
/
home
/
cluster1
/
data
/
bu01
/
1121861
/
html
/
jlex
/
php4
/
Upload File :
New :
File
Dir
/home/cluster1/data/bu01/1121861/html/jlex/php4/schema_discoverer.php4
<? /* This class takes in a lexicon in a hierarchicalized form and creates a group data structure represeting an entry within the lexicon. In this case, an additional file describing the schema is not necessary. This class is meant to handle cases where a linguist uses shoebox to create a hierarchy. In that case, the Shoebox program produces a schema representing the hierarchy. //this->cur_group is initially set to false. Technically, a group will be created representing; //the entire xml document. However, as of this version, we allow the xml document owner the liberty; //of choosing what tags to enclose the set of entries document contains. Consequently, we ask for the; //tag name of enclosing an individual entry. The schema that is returned will represent only this; //group, not the entire xml document. The group object is a represenation of an xml group. For our purposes, we track the maximum number of times a particular field exists within a set of entries. For more information as to why, please see the documenation for group.php5. The method for determining the maximum number of fields within a give group is as follows. For each group, count the number of times a particular field exists. Once the end of the group is reached, for each field determine first if it exists in the group template. If not, add it to the group template. If so, check to see if the number of times the element exists in the current entry is less than the template group. If not, set the template group to that number of fields. */ include_once("group.php4"); class schema_discoverer { var $parser; var $cur_group = false; var $ids = array(); var $cur_fieldsets = array(); var $head_tag; var $tag_stack = array(); var $existing_groups = array(); function schema_discoverer() { $this->parser = xml_parser_create(); xml_set_object($this->parser,$this); xml_set_element_handler($this->parser,"startHandler","endHandler"); xml_set_character_data_handler($this->parser,"cDataHandler"); } function startHandler($xp, $element, $attribs) { $element = strtolower($element); $last_tag = end($this->tag_stack); if($this->cur_group === false) { $new_group = new group($element,&$this->ids,$this->cur_group); if(array_key_exists($element,$this->existing_groups)) { $new_group->fields = &$this->existing_groups[$element]; } unset($this->cur_group); $this->cur_group = &$new_group; unset($new_group); } else if($last_tag != $this->cur_group->name) { $parent = &$this->cur_group; //echo "moving down from $parent->name to "; unset($this->cur_group); if(array_key_exists($last_tag,$parent->groups)) { $this->cur_group = &$parent->groups[$last_tag]; } else { $new_group = new group($last_tag,&$this->ids,&$parent); if(array_key_exists($last_tag,$this->existing_groups)) { $new_group->fields = &$this->existing_groups[$last_tag]; } $parent->add_group(&$new_group); $this->cur_group = &$new_group; unset($new_group); } //echo $this->cur_group->name."<BR>"; } $this->tag_stack[] = $element; } function endHandler($xp, $element) { $element = strtolower($element); $last_tag = end($this->tag_stack); if($element == $this->cur_group->name) { $name = $this->cur_group->name; if(array_key_exists($name,$this->cur_fieldsets)) { $fields = $this->cur_fieldsets[$this->cur_group->name]; foreach($fields as $field=>$count) { if(array_key_exists($field,$this->cur_group->fields)) { if($this->cur_group->fields[$field] < $count) { $this->cur_group->fields[$field] = $count; } } else { $this->cur_group->fields[$field] = $count; } } unset($this->cur_fieldsets[$this->cur_group->name]); $this->existing_groups[$element] = &$this->cur_group->fields; $child = &$this->cur_group; unset($this->cur_group); $this->cur_group = &$child->parent; $this->cur_group->add_group($child); unset($child); //echo $this->cur_group->name." <BR>"; } } else { $name = $this->cur_group->name; if(!array_key_exists($name,$this->cur_fieldsets)) { $this->cur_fieldsets[$name] = array(); $this->cur_fieldsets[$name][$element] = 1; } else { if(array_key_exists($element,$this->cur_fieldsets[$name])) $this->cur_fieldsets[$name][$element]++; else $this->cur_fieldsets[$name][$element] = 1; } } array_pop($this->tag_stack); } function cDataHandler($xp, $data) { } function create_schema($xml_file, $head_tag) { $in = fopen($xml_file,"r"); while($line = fgets($in)) { $xml = ereg_replace("&","&",$line); $good_parse = xml_parse($this->parser,$xml,false); if(!$good_parse) { die("schema_discoverer : BAD PARSE: ".xml_get_current_line_number($this->parser)."<BR>"); } } xml_parser_free($this->parser); $g = ""; foreach($this->cur_group->groups as $group) { if($group->name == $head_tag) { $g = $group; break; } } if($g == "") { echo "ERROR: records must be children of the root node <BR>"; } else { $xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"; $xml .= $g->structure_to_xml(); } return $xml; } } /* set_time_limit(0); //error_reporting(1); $sd = new schema_discoverer(); //$schema = $sd->create_schema("ActiveNahuatl_2005_august.xml","refgroup"); $schema = $sd->create_schema("modified_test.xml","refgroup"); echo "schema: <pre>$schema</pre>"; $out = fopen("../nahuatl/schema.xml","w"); fwrite($out,$schema); fclose($out); */ ?>