Tryag File Manager
Home
-
Turbo Force
Current Path :
/
home
/
cluster1
/
data
/
bu01
/
1121861
/
jlex
/
alignment
/
Upload File :
New :
File
Dir
/home/cluster1/data/bu01/1121861/jlex/alignment/align.php
<? set_time_limit(1000); error_reporting(0); function reverse_complement($s) { $complements = array("A"=>"T","T"=>"A","C"=>"G","G"=>"C"," "=>" "); $s = strtoupper($s); $s = strrev($s); $length = strlen($s); $rc = ""; for($i=0;$i<$length;$i++) { $rc .= $complements[$s[$i]]; } return $rc; } function translate($s, $frame) { $aas = array("TTT"=>"F","TTC"=>"F","TTA"=>"L","TTG"=>"L","TCT"=>"S","TCC"=>"S", "TCA"=>"S","TCG"=>"S","TAT"=>"Y","TAC"=>"Y","TAA"=>"*","TAG"=>"*", "TGT"=>"C","TGC"=>"C","TGA"=>"*","TGG"=>"W","CTT"=>"L","CTC"=>"L", "CTA"=>"L","CTG"=>"L","CCT"=>"P","CCC"=>"P","CCA"=>"P","CCG"=>"P", "CAT"=>"H","CAC"=>"H","CAA"=>"Q","CAG"=>"Q","CGT"=>"R","CGC"=>"R", "CGA"=>"R","CGG"=>"R","ATT"=>"I","ATC"=>"I","ATA"=>"I","ATG"=>"M", "ACT"=>"T","ACC"=>"T","ACA"=>"T","ACG"=>"T","AAT"=>"N","AAC"=>"N", "AAA"=>"K","AAG"=>"K","AGT"=>"S","AGC"=>"S","AGA"=>"R","AGG"=>"R", "GTT"=>"V","GTC"=>"V","GTA"=>"V","GTG"=>"V","GCT"=>"A","GCC"=>"A", "GCA"=>"A","GCG"=>"A","GAT"=>"D","GAC"=>"D","GAA"=>"E","GAG"=>"E", "GGT"=>"G","GGC"=>"G","GGA"=>"G","GGG"=>"G"); $length = strlen($s); $aa_sequence = ""; for($i=$frame-1;$i<$length;$i+=3) { $codon = substr($s,$i,3); if(array_key_exists($codon,$aas)) { $aa_sequence .= $aas[$codon]; } else { $aa_sequence .= "?"; } } return $aa_sequence; } function get_sequences() { $seqs = array(); $word_set = array(); $analysis_type = $_POST["analysis_type"]; $translate = $_POST["translate"]; $frame = $_POST["frame"]; $num_results = $_POST["num_results"]; for($i=0;$i<=$num_results;$i++) { $rc = $_POST["rc_".$i]; $sequence = $_POST["sequence_".$i]; $sequence = trim($sequence); if($sequence != "") { $lines = explode("\n",$sequence); foreach($lines as $line) { if(ereg("^>",$line)) { $name = trim(substr($line,1)); } else if($line != "") { $line = str_replace("*","",$line); $line = trim($line); if($rc == "on") { $line = reverse_complement($line); $name .= "_RC"; } if($translate == "on") { $line = translate($line,$frame); } if($analysis_type == "words") { $words = explode(" ",$line); foreach($words as $word) { if(!in_array($word,$word_set) && ($word != "")) { $seqs[$word."_1"] = $word; $word_set[] = $word; } } } else { $seqs[$name] .= $line; } } } } } return $seqs; } function aligned_to_unaligned($aligned_seqs) { $seqs = array(); foreach($aligned_seqs as $name=>$seq) { $seq = ereg_replace("-","",$seq); $seqs[$name] = $seq; } return $seqs; } function load_clustal_file($file_name) { $in = fopen($file_name,"r"); fgets($in); fgets($in); $seqs = array(); while($line = fgets($in)) { if((ereg("[*]",$line)) || ((strlen($line) > 2) && (trim($line) == ""))) { $line = substr($line,0,-2); $length = strlen($line); $start = $length - $num_cols; $s = substr($line,($num_cols*-1)); $seqs["identity"] .= $s; } else if(trim($line) != "") { $line = trim($line); $index = strrpos($line," "); $name = trim(substr($line,0,$index)); $seq = trim(substr($line,$index)); $length = strlen($seq); $num_cols = $length; $seqs[$name] .= $seq; } $prev_line = trim($line); } fclose($in); return $seqs; } function align_sequences($sequences, $file_name) { $out = fopen("alignments/".$file_name.".txt","w"); foreach($sequences as $name=>$sequence) { fwrite($out,">".$name."\n".$sequence."\n"); } fclose($out); exec("clustalw.exe alignments/".$file_name.".txt"); $seqs = load_clustal_file("alignments/".$file_name.".aln"); return $seqs; } function get_stats($seqs) { $num_seqs = count($seqs); $stats = array(); foreach($seqs as $cur_name=>$cur_seq) { $length = strlen($cur_seq); if($cur_name != "identity") { foreach($seqs as $name=>$seq) { if(($name != "identity") && ($stats[$cur_name][$name] == "")) { if($cur_name != $name){ $num_same = 0; $num_identities = 0; $cur_seq_num_bps = 0; $seq_num_bps = 0; for($i=0;$i<$length;$i++) { if($cur_seq[$i] == $seq[$i]) { $num_same++; if($cur_seq[$i] != "-") { $num_identities++; } } if($cur_seq[$i] != "-") { $cur_seq_num_bps++; } if($seq[$i] != "-") { $seq_num_bps++; } } $stats[$cur_name][$name]["length"] = $length; $stats[$cur_name][$name]["num_same"] = $num_same; $stats[$cur_name][$name]["num_bps"] = $cur_seq_num_bps; $stats[$cur_name][$name]["num_identities"] = $num_identities; $stats[$name][$cur_name]["length"] = $length; $stats[$name][$cur_name]["num_same"] = $num_same; $stats[$name][$cur_name]["num_bps"] = $seq_num_bps; $stats[$name][$cur_name]["num_identities"] = $num_identities; } else { $stats[$cur_name][$name]["length"] = "-"; $stats[$cur_name][$name]["num_same"] = "-"; $stats[$cur_name][$name]["num_bps"] = "-"; $stats[$cur_name][$name]["num_identities"] = "-"; } } } } } return $stats; } function display_stats($stats) { $fields = ""; $cols = "<tr><th></th>"; foreach($stats as $name=>$stat_set) { $cols .= "<th>$name</th>"; $fields .= "<tr><th>$name</th>"; foreach($stat_set as $nums) { $length = $nums["length"]; $num_same = $nums["num_same"]; $num_bps = $nums["num_bps"]; $num_identities = $nums["num_identities"]; $percent = round((($num_same/$length)*100),1); $identity = round((($num_identities/$num_bps)*100),1); $fields .= "<td align='center'>$percent% : $num_same / $length<BR> $identity% : $num_identities / $num_bps</td>"; } $fields .= "</tr>"; } $cols .= "</tr>"; $table = "<html><table border='1'>$cols $fields </table>"; echo $table; } function make_blast_link($seq) { $seq = ereg_replace("-","",$seq); $length = strlen($seq); $temp = ereg_replace("[DEFHIKLMNPQRS]","",$seq); $temp_length = strlen($temp); if((.75*$length) > $temp_length) { $program = "blastp"; $page = "Proteins"; } else { $program = "blastn"; $page = "Nucleotides"; } $blast_link = "<a href='http://www.ncbi.nih.gov/BLAST/Blast.cgi?CMD=Web&AUTO_FORMAT=Semiauto&ALIGNMENTS=50&ALIGNMENT_VIEW=Pairwise&CLIENT=web&DATABASE=nr&DESCRIPTIONS=100&QUERY=".$seq."&EXPECT=10&FILTER=L&FORMAT_OBJECT=Alignment&FORMAT_TYPE=HTML&NCBI_GI=on&PAGE=".$page."&PROGRAM=".$program."&SERVICE=plain&SET_DEFAULTS.x=34&SET_DEFAULTS.y=8&SHOW_OVERVIEW=on&END_OF_HTTPGET=Yes&SHOW_LINKOUT=yes&GET_SEQUENCE=yes' target='_blast'>Blast!</a>"; return $blast_link; } function display_report_form($file_name) { echo "<a href='align.php4?function=stats' target='_stats'>Display Statistics Report</a><BR>"; echo "<br><br>"; echo "<H2>Record Alignment Result</H2>"; echo "<form action='align.php4' method='post'>"; echo "<input type='hidden' name='function' value='record'/>"; echo "<input type='hidden' name='file_name' value='$file_name'/>"; echo "<table width='800'>"; echo "<tr><th align='left' width='50'>Date:</th><td><input type='text' name='date' value='".date("Y-m-d")."'/>(YYYY-MM-DD)</td></tr>"; echo "<tr><th colspan='2' align='left'>Purpose:</th></tr>"; echo "<tr><td colspan='2'><textarea name='purpose' rows='2' cols='60'></textarea></td></tr>"; echo "<tr><th colspan='2' align='left'>Conclusions:</th></tr>"; echo "<tr><td colspan='2'><textarea name='conclusions' cols='80' rows='6'></textarea></td></tr>"; echo "<tr><th colspan='2' align='left'>Plans:</th></tr>"; echo "<tr><td colspan='2'><textarea name='plans' cols='80' rows='6'></textarea></td></tr>"; echo "<tr><td colspan='2' align='center'><input type='submit' value='Record'></td></tr>"; echo "</table></form>"; } function display_alignment_horizontal($seqs, $font_size) { $dna_colors = array("A"=>"red","G"=>"orange","C"=>"blue","T"=>"green","-"=>"white","*"=>"white"," "=>"white"); $aa_colors = array("A"=>"NAVY","C"=>"BLUE", "D"=>"OLIVE","E"=>"MAROON", "F"=>"RED","G"=>"LIME", "H"=>"GOLD","I"=>"MAGENTA", "K"=>"CC99CC","L"=>"ORANGE", "M"=>"GOLD","N"=>"990033", "P"=>"GRAY","Q"=>"TAN", "R"=>"SILVER","S"=>"YELLOW", "T"=>"336633","V"=>"PINK", "W"=>"PURPLE","Y"=>"GREEN", "-"=>"white","*"=>"white"); $offsets = "<tr><td> </td><td> </td>"; $length = strlen($seqs["identity"]); for($i=0;$i<$length;$i++) { if((($i+1) % 20) == 0) { $offsets .= "<td style=\"font-size: $font_size\">".($i+1)."</td>"; } else { $offsets .= "<td> </td>"; } } $offsets .= "</tr>"; echo "<form action='align.php4' method='post'>"; echo "<input type='hidden' name='function' value='realign'/>"; echo "<input type='hidden' name='num_results' value='".count($seqs)."'/>"; echo "Output Filename: <input type='text' size='30' name='file_name'/><br/>"; echo "Font Size: <input type='text' size='10' name='font_size' value='8px'/>"; echo "<table>"; echo "<tr><th>Blast</th><th>Realign</th><th>RC</th><th>Name</th></tr>"; echo $offsets; $row_num = 1; foreach($seqs as $name=>$seq) { $length = strlen($seq); $temp = ereg_replace("[DEFHIKLMNPQRS]","",$seq); $temp_length = strlen($temp); if((.75*$length) > $temp_length) { $colors = $aa_colors; } else { $colors = $dna_colors; } $blast_link = make_blast_link($seq); echo "<tr><td>$blast_link</td>"; echo "<td><input type='checkbox' name='realign_".$row_num."' value='on'/>"; echo "<td><input type='checkbox' name='rc_".$row_num."' value='on'/></td>"; echo "<td nowrap>$name<input type='hidden' name='name_".$row_num."' value='$name'/></td>"; for($i=0;$i<$length;$i++) { if(array_key_exists($seq[$i],$colors)) { $color = $colors[$seq[$i]]; } echo "<td bgcolor='$color'><font style=\"font-size: $font_size\">".$seq[$i]."</font></td>"; } echo "</tr>"; $row_num++; } echo $offsets; echo "</table>"; echo "Translate Sequences: <input type='checkbox' name='translate' value='on'/>"; echo "Frame: <select name='frame'><option>1</option><option>2</option><option>3</option></select><BR/>"; echo "<input type='submit'/>"; echo "</form>"; } function display_alignment_vertical($seqs, $cols, $font_size) { $colors = array("A"=>"red","G"=>"orange","C"=>"blue","T"=>"green","-"=>"white","*"=>"white"); if($cols == "") { $cols = 60; } $rows = array(); $cur_start = 0; $num_seqs = count($seqs); foreach($seqs as $name=>$seq) { $length = strlen($seq); $row = "<td>$name</td>"; $cur_row = $cur_start; for($i=0;$i<$length;$i++) { if(array_key_exists($seq[$i],$colors)) { $color = $colors[$seq[$i]]; } $row .= "<td bgcolor='$color'><font style=\"font-size: $font_size\">".$seq[$i]."</font></td>"; if((($i+1) % $cols) == 0) { $row = "<tr>$row</tr>"; $rows[$cur_row] = $row; $cur_row += $num_seqs; $row = "<td>$name</td>"; } } $cur_start++; $i++; } echo "<form action='align.php4' method='post'>"; echo "<input type='hidden' name='function' value='realign'/>"; echo "<table>"; $count = 0; $offset = 0; $num_rows = count($rows); for($j=0;$j<$num_rows;$j++) { $row = $rows[$j]; if(($count % $num_seqs) == 0) { $offsets = ""; for($i=0;$i<$cols;$i++) { if((($i+1)%21) == 0) { $offset += 20; $offsets .= "<td>$offset</td>"; } else { $offsets .= "<td> </td>"; } } echo $offsets; } $count++; echo $row; } echo "</table>"; } /* NOTE: This writes to an unclosed xml file. It must be closed upon adding it to MySQL */ function record_experiment($aligned_seqs) { $date = $_POST["date"]; $purpose = $_POST["purpose"]; $conclusions = $_POST["conclusions"]; $plans = $_POST["plans"]; $file_name = strtolower($_POST["file_name"]); $out = fopen("../alignments.xml","a+"); $close_tag = "</alignments>"; $index = strlen($close_tag)*(-1); fwrite($out,"<!--*******************************************************-->\n"); fwrite($out,"<alignment>\n"); fwrite($out,"<date>$date</date>\n"); fwrite($out,"<purpose>$purpose</purpose>\n"); fwrite($out,"<file>$file_name</file>\n"); foreach($aligned_seqs as $name=>$seq) { if($name != "identity") { $index = strpos($name,"-"); $strain = substr($name,0,$index); $description = substr($name,$index+1); fwrite($out,"<sequence>\n"); fwrite($out,"<strain>$strain</strain>\n"); fwrite($out,"<description>$description</description>\n"); fwrite($out,"</sequence>\n"); } } fwrite($out,"<conclusions>$conclusions</conclusions>\n"); fwrite($out,"<plans>$plans</plans>\n"); fwrite($out,"</alignment>\n"); fclose($out); } session_start(); $font_size = $_POST["font_size"]; $file_name = $_POST["file_name"]; $function = $_POST["function"]; if($function == "") { $function = $_GET["function"]; } if($function == "realign") { if(!array_key_exists("seqs",$_SESSION)) { die("No sequences to realign.<BR>"); } $translate = trim($_POST["translate"]); $frame = $_POST["frame"]; $old_seqs = $_SESSION["seqs"]; $seqs = array(); $num_sequences = $_POST["num_results"]; for($i=1;$i<=$num_sequences;$i++) { if($_POST["realign_".$i] == "on") { $name = $_POST["name_".$i]; $seq = $old_seqs[$name]; if($_POST["rc_".$i] == "on") { $seq = reverse_complement($seq); } if($translate == "on") { $seq = translate($seq,$frame); } $seqs[$name] = $seq; } } $aligned_seqs = align_sequences($seqs,$file_name); $_SESSION["aligned_seqs"] = $aligned_seqs; display_alignment_horizontal($aligned_seqs,$font_size); display_report_form($file_name); } else if ($function == "record") { record_experiment($_SESSION["aligned_seqs"]); echo "Thank you for recording your data.<BR/>"; echo "<a href='../add_database.php4?file=alignments.xml&head_tag=alignment&mode=1'>Add Database Now</a><BR/>"; echo "<a href='../alignment/alignment_database.xml'>Alignment Database</a><BR/>"; echo "<a href='../igrs/igr_database.xml'>IGR Database</a><BR/>"; echo "<a href='../rdloci/rdloci_database.xml'>RD Loci Database</a><BR/>"; } else if ($function == "stats") { $aligned_seqs = $_SESSION["aligned_seqs"]; $stats = get_stats($aligned_seqs); display_stats($stats); } else if($function == "db_result") { $file_name = $_GET["file_name"]; $aligned_seqs = load_clustal_file("alignments/".$file_name.".aln"); $seqs = aligned_to_unaligned($aligned_seqs); $_SESSION["seqs"] = $seqs; $_SESSION["aligned_seqs"] = $aligned_seqs; display_alignment_horizontal($aligned_seqs,$font_size); display_report_form($file_name); } else { $seqs = get_sequences(); $_SESSION["seqs"] = $seqs; $aligned_seqs = align_sequences($seqs, $file_name); $_SESSION["aligned_seqs"] = $aligned_seqs; display_alignment_horizontal($aligned_seqs,$font_size); display_report_form($file_name); } ?>