Tryag File Manager
Home
-
Turbo Force
Current Path :
/
home
/
cluster1
/
data
/
bu01
/
1121861
/
html
/
jlex
/
php5
/
Upload File :
New :
File
Dir
/home/cluster1/data/bu01/1121861/html/jlex/php5/db_modifier.php5~
<? /* NOTE: In PHP4, you do not need to first decode cdata from utf8. PHP5 assumes the source is in utf8, so you must first decode the cdata. */ include_once("schema_loader.php5"); class db_modifier { public $parser; public $out; public $cur_data; public $count; public $table; public $symbols; public $substitutes; public $first_chars; public $pattern; public $alpha_symbols; public $alpha_substitutes; public $alpha_first_chars; public $alpha_pattern; public $sort_fields; public $has_sort_fields; public $alpha; public $cur_alpha_index; public $head_tag; public $produce_stripped_version; public $strippable_fields; public $fields; public $fields_to_strip; public $in_entry; public $group_names; function __construct() { $this->fields_to_strip = array(); $this->check_all_fields = false; $this->count = 0; $this->cur_data = ""; $this->symbols = array(); $this->substitutes = array(); $this->pattern = ""; $this->alpha_symbols = array(); $this->alpha_substitutes = array(); $this->alpha_first_chars = array(); $this->alpha_pattern = ""; $this->sort_fields = array(); $this->has_sort_fields = false; $this->alpha = ""; $this->cur_alpha_index = 10; $this->strippable_fields = array(); $this->fields = array(); $this->fields_to_strip = array(); } function get_sort_fields($sort_fields) { $fields = explode(" ",$sort_fields); foreach($fields as $field) { $this->sort_fields[] = $field; } } function get_fields_to_strip($fields) { $in = fopen($fields,"r"); while($line = fgets($in)) { $field = trim($line); $fields_to_strip[] = $field; } return $fields_to_strip; } function make_alpha_symbols_table($textarea) { $textarea = stripslashes($textarea); $textarea = trim(utf8_decode($textarea)); $lines = explode("\n",$textarea); foreach($lines as $line) { $vals = explode(" = ",$line); $substitute = trim($vals[1]); if($substitute == "''") { $substitute = ""; } $symbols = preg_split('//', $vals[0], -1, PREG_SPLIT_NO_EMPTY); foreach($symbols as $symbol) { $this->alpha_symbols[] = $symbol; $this->alpha_substitutes[] = $substitute; } } $symbols_string = implode("",$this->alpha_symbols); if(substr($symbols_string,-1,1) == "'") { $symbols_string = "'".substr($symbols_string,0,-1); } if($symbols_string != "") { $this->alpha_pattern = "[".$symbols_string."]"; } } function make_alpha_first_chars_table($textarea) { $textarea = trim(stripslashes($textarea)); $textarea = utf8_decode($textarea); $lines = explode("\n",$textarea); foreach($lines as $line) { $vals = explode(" = ",$line); $substitute = trim($vals[1]); if($substitute == "''") { $substitute = ""; } $symbols = preg_split('//', $vals[0], -1, PREG_SPLIT_NO_EMPTY); foreach($symbols as $symbol) { $this->alpha_first_chars[$symbol] = $substitute; } } $first_chars_string = trim(implode("",array_keys($this->alpha_first_chars))); if(substr($first_chars_string,-1,1) == "'") { $first_chars_string = "'".substr($first_chars_string,0,-1); } if($first_chars_string != "") { if($this->alpha_pattern != "") { $this->alpha_pattern .= "|"; } $this->alpha_pattern .= "^[".$first_chars_string."]"; } } function make_symbols_table($textarea) { $textarea = stripslashes($textarea); $textarea = trim(utf8_decode($textarea)); $lines = explode("\n",$textarea); foreach($lines as $line) { $vals = explode(" = ",$line); $substitute = trim($vals[1]); if($substitute == "''") { $substitute = ""; } $symbols = preg_split('//', $vals[0], -1, PREG_SPLIT_NO_EMPTY); foreach($symbols as $symbol) { $this->symbols[] = $symbol; $this->substitutes[] = $substitute; } } $symbols_string = implode("",$this->symbols); if(substr($symbols_string,-1,1) == "'") { $symbols_string = "'".substr($symbols_string,0,-1); } if($symbols_string != "") { $this->pattern = "[".$symbols_string."]"; } } function make_first_chars_table($textarea) { $textarea = trim(stripslashes($textarea)); $textarea = utf8_decode($textarea); $lines = explode("\n",$textarea); foreach($lines as $line) { $vals = explode(" = ",$line); $substitute = trim($vals[1]); if($substitute == "''") { $substitute = ""; } $symbols = preg_split('//', $vals[0], -1, PREG_SPLIT_NO_EMPTY); foreach($symbols as $symbol) { $this->first_chars[$symbol] = $substitute; } } $first_chars_string = trim(implode("",array_keys($this->first_chars))); if(substr($first_chars_string,-1,1) == "'") { $first_chars_string = "'".substr($first_chars_string,0,-1); } if($first_chars_string != "") { if($this->pattern != "") { $this->pattern .= "|"; } $this->pattern .= "^[".$first_chars_string."]"; } } function alpha_strip($value) { $stripped = str_replace($this->alpha_symbols,$this->alpha_substitutes,$value); $first_char = substr($stripped,0,1); while(@array_key_exists($first_char,$this->alpha_first_chars)) { $stripped = $this->alpha_first_chars[$first_char].substr($stripped,1); $first_char = substr($stripped,0,1); } return $stripped; } function strip($value) { $stripped = str_replace($this->symbols,$this->substitutes,$value); $first_char = substr($stripped,0,1); while(@array_key_exists($first_char,$this->first_chars)) { $stripped = $this->first_chars[$first_char].substr($stripped,1); $first_char = substr($stripped,0,1); } return $stripped; } function startHandler($xp, $element, $attribs) { $element = strtolower($element); if($element == $this->head_tag) { $this->in_entry = true; } if($this->produce_stripped_version) { if($this->in_entry) { if(in_array($element,$this->fields)) { fwrite($this->out,"<$element>"); } } else { fwrite($this->out,"<$element>\n"); } } } function endHandler($xp, $element) { $element = strtolower($element); if($this->produce_stripped_version) { if($this->has_sort_fields && in_array($element, $this->sort_fields)) { $index = array_search($element,$this->sort_fields); if($index < $this->cur_alpha_index) { $this->alpha = $this->alpha_strip($this->cur_data); $this->cur_alpha_index = $index; } } if(($element == $this->head_tag) && $this->has_sort_fields) { fwrite($this->out,"<alpha>$this->alpha</alpha>\n"); $this->alpha = ""; $this->cur_alpha_index = count($this->sort_fields); } if($this->in_entry) { if(in_array($element,$this->fields)) { fwrite($this->out,"$this->cur_data</$element>\n"); } } else { fwrite($this->out,"$this->cur_data</$element>\n"); } } else { if($this->in_entry) { if(!in_array($element,$this->fields)) { $this->fields[] = $element; } } } if($this->pattern != "") { if(ereg($this->pattern,$this->cur_data)) { if($this->produce_stripped_version) { if(in_array($element,$this->fields_to_strip)) { $stripped_value = $this->strip($this->cur_data); fwrite($this->out,"<".$element."_s>$stripped_value</".$element."_s>\n"); } } else { if(!in_array($element,$this->strippable_fields)) { $this->strippable_fields[] = $element; } } } } if($element == $this->head_tag) { $this->in_entry = false; } $this->cur_data = ""; } function cDataHandler($xp, $data) { $data = trim($data); if($data != "") { $data = ereg_replace("&","&",$data); $data = ereg_replace("<","<",$data); $data = ereg_replace(">",">",$data); $this->cur_data .= $data; } } function unhtmlentities ($string) { $trans_tbl = get_html_translation_table (HTML_ENTITIES); $trans_tbl = array_flip ($trans_tbl); $ret = strtr ($string, $trans_tbl); return preg_replace('/\&\#([0-9]+)\;/me', "chr('\\1')",$ret); } function get_fields($xml,$head_tag,$symbols,$first_chars) { $this->parser = xml_parser_create(); xml_set_object($this->parser,$this); xml_set_element_handler($this->parser,"startHandler","endHandler"); xml_set_character_data_handler($this->parser,"cDataHandler"); $this->head_tag = $head_tag; if(trim($symbols) != "") { $this->make_symbols_table($symbols); } if(trim($first_chars) != "") { $this->make_first_chars_table($first_chars); } $this->produce_stripped_version = false; $in = fopen($xml,"r"); while($line = fgets($in)) { xml_parse($this->parser,$line,false); } fclose($in); sort($this->strippable_fields); sort($this->fields); } function modify($xml,$new_xml_name, $head_tag, $sort_fields, $alpha_symbols, $alpha_first_chars, $symbols, $first_chars, $fields, $fields_to_strip) { $this->produce_stripped_version = true; $this->parser = xml_parser_create(); xml_set_object($this->parser,$this); xml_set_element_handler($this->parser,"startHandler","endHandler"); xml_set_character_data_handler($this->parser,"cDataHandler"); $this->fields = $fields; $this->fields_to_strip = $fields_to_strip; $this->head_tag = $head_tag; if($sort_fields != "") { $this->get_sort_fields($sort_fields); $this->has_sort_fields = true; $this->fields[] = "alpha"; } else { $this->has_sort_fields = false; } if(trim($symbols) != "") { $this->make_symbols_table($symbols); } if(trim($first_chars) != "") { $this->make_first_chars_table($first_chars); } if(trim($alpha_symbols) != "") { $this->make_alpha_symbols_table($alpha_symbols); } if(trim($alpha_first_chars) != "") { $this->make_alpha_first_chars_table($alpha_first_chars); } $this->out = fopen($new_xml_name,"w"); fwrite($this->out,"<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"); $in = fopen($xml,"r"); while($line = fgets($in)) { xml_parse($this->parser,$line,false); } fclose($in); fclose($this->out); xml_parser_free($this->parser); } } /* //NOTE: you must remove utf8_encode from the make_symbol_table functions when testing from the command line set_time_limit(0); $converter = new db_modifier(); $sort_fields = "lxa lxo"; $symbols = "á = a\né = e\ní = i\nó = o\nú = u\nÁ = A\nÉ = E\nÍ = I\nÓ = O\nÚ = U\nÑ = N\nñ = n\nü = u"; $first_chars = "-' = ''"; $alpha_symbols = "á = a"; $alpha_first_chars = "-' = ''"; $fields_not_to_strip = array("compound","disamb","enc_note","fl","fla","flao","flo","grm","grmx","irregv","lex_aff","mod","nae","ncol","nde","nfe","nse","nss","pea","peo","pl_com","plo","qry","qry_fon","qry_h","root_notes","se","sea","seao","sem","seo","spko","src","subadj"); $converter->get_fields("test.xml",$symbols,$first_chars); foreach($converter->strippable_fields as $field) { echo "$field \n"; } $fields_to_strip = array_diff($converter->strippable_fields,$fields_not_to_strip); $converter->modify("ActiveNahuatl_2005_august_final.xml","with_stripped.xml","refgroup", $sort_fields,$alpha_symbols,$alpha_first_chars,$symbols,$first_chars, $converter->fields,$fields_to_strip); */ ?>