Changeset 57 for trunk/WordPress/plugin/transposh/parser.php
- Timestamp:
- 03/03/2009 10:34:39 AM (3 years ago)
- File:
-
- 1 edited
-
trunk/WordPress/plugin/transposh/parser.php (modified) (18 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/WordPress/plugin/transposh/parser.php
r50 r57 21 21 * fetch translation and update the translated page. 22 22 * This file should include only general purpose parser functionality while using callbacks 23 * to obtain WorkdPress specific capabilities, e.g. db access. 23 * to obtain WorkdPress specific capabilities, e.g. db access. 24 24 */ 25 25 … … 27 27 require_once("constants.php"); 28 28 29 //The language to which the current page will be translated to. 29 //The language to which the current page will be translated to. 30 30 $lang; 31 31 32 32 //The html page which starts contains the content being translated 33 33 $page; 34 34 35 35 //Marks the current position of the translation process within the page 36 36 $pos = 0; … … 39 39 $tags_list = array(); 40 40 41 //The translated html page 41 //The translated html page 42 42 $tr_page; 43 44 //Points to the last character that have been copied from the original to the translated page. 43 44 //Points to the last character that have been copied from the original to the translated page. 45 45 $tr_mark = 0; 46 46 47 //Is the current use is in edit mode. 47 //Is the current use is in edit mode. 48 48 $is_edit_mode = FALSE; 49 49 … … 56 56 /* 57 57 * Parse the html page into tags, identify translateable string which 58 * will be translated. 58 * will be translated. 59 59 */ 60 60 function process_html() 61 61 { 62 logger("Enter " . __METHOD__, 4); 63 64 global $page, $tr_page, $pos, $tags_list, $lang; 65 $no_translate = 0; 66 $page_length = strlen($page); 67 68 while($pos < $page_length) 69 { 70 //find beginning of next tag 71 $pos = strpos($page, '<', $pos); 72 if($pos === FALSE) 73 { 74 //logger("Error finding < in pos " . $pos . " page: " . $page); 75 break; 76 } 77 $pos++; 78 79 //Get the element identifying this tag 80 $element = get_element(); 81 82 if(should_skip_element($element)) 83 { 84 //do nothing 85 } 86 else 87 { 88 //Mark tag start position 89 $tag_start = $pos; 90 91 //skip to the '>' marking the end of the element 92 $pos = strpos($page, '>', $pos); 93 94 //Mark tag end position 95 $tag_end = $pos; 96 97 if($page[$pos-1] == '/') 98 { 99 //single line tag - no need to update tags list 100 process_tag_init($element, $tag_start, $tag_end); 101 } 102 else if($element[0] != '/') 103 { 104 if(!$no_translate) 105 { 106 process_tag_init($element, $tag_start, $tag_end); 107 } 108 109 $tags_list[] = $element; 110 111 //Look for the no translate class 112 if(stripos($element, NO_TRANSLATE_CLASS) !== FALSE) 113 { 114 $no_translate++; 115 } 116 } 117 else 118 { 119 $popped_element = array_pop($tags_list); 120 if(!$no_translate) 121 { 122 process_tag_termination($element); 123 } 124 125 //Look for the no translate class 126 if(stripos($popped_element, NO_TRANSLATE_CLASS) !== FALSE) 127 { 128 $no_translate--; 129 } 130 } 131 132 $pos++; 133 134 //skip processing while enclosed within a tag marked by no_translate 135 if(!$no_translate) 136 { 137 process_current_tag(); 138 } 139 140 } 141 } 142 143 if(strlen($tr_page) > 0) 144 { 145 //Some translation has been taken place. Complete the translated 146 //page up to the full contents of the original page. 147 update_translated_page(strlen($page), -1, ""); 148 } 149 150 logger("Exit " . __METHOD__, 4); 62 logger("Enter " . __METHOD__, 4); 63 64 global $page, $tr_page, $pos, $tags_list, $lang; 65 $no_translate = 0; 66 $page_length = strlen($page); 67 68 while($pos < $page_length) 69 { 70 //find beginning of next tag 71 $pos = strpos($page, '<', $pos); 72 if($pos === FALSE) 73 { 74 //logger("Error finding < in pos " . $pos . " page: " . $page); 75 break; 76 } 77 $pos++; 78 79 //Get the element identifying this tag 80 $element = get_element(); 81 82 if(should_skip_element($element)) 83 { 84 logger ("skipping element: $element"); 85 //do nothing 86 } 87 else 88 { 89 //Mark tag start position 90 $tag_start = $pos; 91 92 //skip to the '>' marking the end of the element 93 if ($element == "!") { 94 $pos = strpos($page, '[', $pos); 95 }else { 96 $pos = strpos($page, '>', $pos); 97 } 98 99 //Mark tag end position 100 $tag_end = $pos; 101 102 if($page[$pos-1] == '/') 103 { 104 //single line tag - no need to update tags list 105 process_tag_init($element, $tag_start, $tag_end); 106 } 107 else if($element[0] != '/') 108 { 109 if(!$no_translate) 110 { 111 process_tag_init($element, $tag_start, $tag_end); 112 } 113 114 $tags_list[] = $element; 115 116 //Look for the no translate class 117 if(stripos($element, NO_TRANSLATE_CLASS) !== FALSE) 118 { 119 $no_translate++; 120 } 121 } 122 else 123 { 124 $popped_element = array_pop($tags_list); 125 if(!$no_translate) 126 { 127 process_tag_termination($element); 128 } 129 130 //Look for the no translate class 131 if(stripos($popped_element, NO_TRANSLATE_CLASS) !== FALSE) 132 { 133 $no_translate--; 134 } 135 } 136 137 $pos++; 138 139 //skip processing while enclosed within a tag marked by no_translate 140 if(!$no_translate) 141 { 142 process_current_tag(); 143 } 144 145 } 146 } 147 148 if(strlen($tr_page) > 0) 149 { 150 //Some translation has been taken place. Complete the translated 151 //page up to the full contents of the original page. 152 update_translated_page(strlen($page), -1, ""); 153 } 154 155 logger("Exit " . __METHOD__, 4); 151 156 } 152 157 … … 155 160 * Determine if the specified element should be skipped. If so the position 156 161 * is moved past end of tag. 157 * Return TRUE if element is skipped otherwise FALSE. 162 * Return TRUE if element is skipped otherwise FALSE. 158 163 */ 159 164 function should_skip_element(&$element) 160 165 { 161 global $page, $pos;162 $rc = TRUE;163 164 if(strncmp($element, "!DOCTYPE", 8) == 0)165 {166 $pos = strpos($page, '>', $pos);167 }168 else if(strncmp($element, "!--", 3) == 0)169 {170 $pos = strpos($page, '-->', $pos);171 }172 else173 {174 $rc = FALSE;175 }176 177 return $rc;166 global $page, $pos; 167 $rc = TRUE; 168 169 if(strncmp($element, "!DOCTYPE", 8) == 0) 170 { 171 $pos = strpos($page, '>', $pos); 172 } 173 else if(strncmp($element, "!--", 3) == 0) 174 { 175 $pos = strpos($page, '-->', $pos); 176 } 177 else 178 { 179 $rc = FALSE; 180 } 181 182 return $rc; 178 183 } 179 184 … … 184 189 function process_tag_init(&$element, $start, $end) 185 190 { 186 switch ($element) 187 { 188 case 'a': 189 process_anchor_tag($start, $end); 190 break; 191 case 'div' : 192 case 'span': 193 process_span_or_div_tag($element, $start, $end); 194 break; 195 case 'html': 196 process_html_tag($start, $end); 197 break; 198 case 'body': 199 global $is_in_body; 200 $is_in_body = TRUE; 201 break; 202 203 204 } 205 191 switch ($element) 192 { 193 case 'a': 194 process_anchor_tag($start, $end); 195 break; 196 case 'div' : 197 case 'span': 198 //case 'description': 199 //case 'content:encoded': 200 logger("in case : $element",1); 201 process_span_or_div_tag($element, $start, $end); 202 break; 203 case 'html': 204 process_html_tag($start, $end); 205 break; 206 case 'body': 207 case 'channel': 208 global $is_in_body; 209 $is_in_body = TRUE; 210 break; 211 } 212 206 213 } 207 214 … … 214 221 function process_span_or_div_tag(&$element, $start, $end) 215 222 { 216 217 $cls = get_attribute($start, $end, 'class');218 219 if($cls == NULL)220 {221 return;222 }223 224 //Look for the no translate class 225 if(stripos($cls, NO_TRANSLATE_CLASS) === FALSE)226 {227 return;228 }229 230 //Mark the element as not translatable231 $element .= "." . NO_TRANSLATE_CLASS;232 } 233 234 235 /* 236 * Process html tag. Set the direction for rtl languages. 223 224 $cls = get_attribute($start, $end, 'class'); 225 226 if($cls == NULL) 227 { 228 return; 229 } 230 231 //Look for the no translate class 232 if(stripos($cls, NO_TRANSLATE_CLASS) === FALSE) 233 { 234 return; 235 } 236 237 //Mark the element as not translatable 238 $element .= "." . NO_TRANSLATE_CLASS; 239 } 240 241 242 /* 243 * Process html tag. Set the direction for rtl languages. 237 244 * 238 245 */ 239 246 function process_html_tag($start, $end) 240 247 { 241 global $lang, $rtl_languages;242 243 if(!(in_array ($lang, $rtl_languages)))244 {245 return;246 }247 248 $dir = get_attribute($start, $end, 'dir');249 250 if($dir == NULL)251 {252 253 //attribute does not exist - add it254 update_translated_page($end, -1, 'dir="rtl"');255 }256 else257 {258 $dir = 'rtl';259 260 //rewrite url in translated page261 update_translated_page($start, $end, $dir);262 263 }264 logger(__METHOD__ . " Changed page direction to rtl");248 global $lang, $rtl_languages; 249 250 if(!(in_array ($lang, $rtl_languages))) 251 { 252 return; 253 } 254 255 $dir = get_attribute($start, $end, 'dir'); 256 257 if($dir == NULL) 258 { 259 260 //attribute does not exist - add it 261 update_translated_page($end, -1, 'dir="rtl"'); 262 } 263 else 264 { 265 $dir = 'rtl'; 266 267 //rewrite url in translated page 268 update_translated_page($start, $end, $dir); 269 270 } 271 logger(__METHOD__ . " Changed page direction to rtl"); 265 272 } 266 273 … … 272 279 function process_tag_termination(&$element) 273 280 { 274 global $pos, $tags_list, $page;275 276 logger(__METHOD__ . " $element ". $page[$pos], 4);277 } 278 279 280 /* 281 * Return the element id within the current tag. 281 global $pos, $tags_list, $page; 282 283 logger(__METHOD__ . " $element ". $page[$pos], 4); 284 } 285 286 287 /* 288 * Return the element id within the current tag. 282 289 */ 283 290 function get_element() 284 291 { 285 logger("Enter " . __METHOD__, 5); 286 global $page, $pos; 287 288 skip_white_space(); 289 290 $start = $pos; 291 292 //keep scanning till the first white space or the '>' mark 293 while($pos < strlen($page) && $page[$pos] != ' '&& 294 $page[$pos] != '>' && $page[$pos] != '\t') 295 { 296 $pos++; 297 } 298 299 logger("Exit " . __METHOD__, 5); 300 return substr($page,$start, $pos - $start); 292 global $page, $pos; 293 logger("Enter " . __METHOD__. ": $pos", 5); 294 295 skip_white_space(); 296 297 $start = $pos; 298 299 //keep scanning till the first white space or the '>' mark 300 // TODO - for CDATA, check '[' 301 while($pos < strlen($page) && $page[$pos] != ' ' && $page[$pos] != '[' && 302 $page[$pos] != '>' && $page[$pos] != '\t') 303 { 304 $pos++; 305 } 306 307 logger("Exit " . __METHOD__. ": $pos", 5); 308 return substr($page,$start, $pos - $start); 301 309 } 302 310 … … 306 314 * Returns the string containing the attribute if available otherwise NULL. 307 315 * In addition the start and end position are moved to boundaries of the 308 * attribute's value. 316 * attribute's value. 309 317 */ 310 318 function get_attribute(&$start, &$end, $id) 311 319 { 312 global $page;313 314 //look for the id within the given limits.315 while($start < $end)316 {317 $index = 0;318 319 while($start < $end && $page[$start + $index] == $id[$index]320 && $index < strlen($id))321 {322 $index++;323 }324 325 if($index == strlen($id))326 {327 //we have match328 break;329 }330 331 $start++;332 }333 334 if($start == $end)335 {336 return NULL;337 }338 339 //look for the " or ' marking start of attribute's value340 while($start < $end && $page[$start] != '"' && $page[$start] != "'")341 {342 $start++;343 }344 345 $start++;346 if($start >= $end)347 {348 return NULL;349 }350 351 $tmp = $start + 1;352 //look for the " or ' marking the end of attribute's value353 while($tmp < $end && $page[$tmp] != '"' && $page[$tmp] != "'")354 {355 $tmp++;356 }357 358 $end = $tmp - 1;359 360 361 return substr($page, $start, $end - $start + 1);320 global $page; 321 322 //look for the id within the given limits. 323 while($start < $end) 324 { 325 $index = 0; 326 327 while($start < $end && $page[$start + $index] == $id[$index] 328 && $index < strlen($id)) 329 { 330 $index++; 331 } 332 333 if($index == strlen($id)) 334 { 335 //we have match 336 break; 337 } 338 339 $start++; 340 } 341 342 if($start == $end) 343 { 344 return NULL; 345 } 346 347 //look for the " or ' marking start of attribute's value 348 while($start < $end && $page[$start] != '"' && $page[$start] != "'") 349 { 350 $start++; 351 } 352 353 $start++; 354 if($start >= $end) 355 { 356 return NULL; 357 } 358 359 $tmp = $start + 1; 360 //look for the " or ' marking the end of attribute's value 361 while($tmp < $end && $page[$tmp] != '"' && $page[$tmp] != "'") 362 { 363 $tmp++; 364 } 365 366 $end = $tmp - 1; 367 368 369 return substr($page, $start, $end - $start + 1); 362 370 } 363 371 … … 369 377 function process_current_tag() 370 378 { 371 global $page, $pos, $tags_list, $is_in_body;372 373 $current_tag = end($tags_list);374 375 logger("Enter " . __METHOD__ ." : $current_tag", 4);376 377 //translate only elements within the body or title378 if($is_in_body || $current_tag == 'title')379 {380 skip_white_space();381 $start = $pos;382 $page_length = strlen($page);383 384 while($pos < $page_length && $page[$pos] != '<')385 {386 //will break translation unit when one of the following characters is reached: ., 387 if(is_sentence_breaker($pos))388 {389 translate_text($start);390 $pos++;391 $start = $pos;392 }393 else if(($end_of_entity = is_html_entity($pos)))394 {395 translate_text($start);396 $pos++;397 $start = $end_of_entity;398 }399 else400 {401 $pos++;402 }403 }404 405 if($pos > $start)406 {407 translate_text($start);408 }409 }410 logger("Exit" . __METHOD__ . " : $current_tag" , 4);379 global $page, $pos, $tags_list, $is_in_body; 380 381 $current_tag = end($tags_list); 382 383 logger("Enter " . __METHOD__ ." : $current_tag", 4); 384 385 //translate only elements within the body or title 386 if($is_in_body || $current_tag == 'title') 387 { 388 skip_white_space(); 389 $start = $pos; 390 $page_length = strlen($page); 391 392 while($pos < $page_length && $page[$pos] != '<') 393 { 394 //will break translation unit when one of the following characters is reached: ., 395 if(is_sentence_breaker($pos)) 396 { 397 translate_text($start); 398 $pos++; 399 $start = $pos; 400 } 401 else if(($end_of_entity = is_html_entity($pos))) 402 { 403 translate_text($start); 404 $pos++; 405 $start = $end_of_entity; 406 } 407 else 408 { 409 $pos++; 410 } 411 } 412 413 if($pos > $start) 414 { 415 translate_text($start); 416 } 417 } 418 logger("Exit" . __METHOD__ . " : $current_tag" , 4); 411 419 } 412 420 … … 419 427 function is_sentence_breaker($position) 420 428 { 421 global $page;422 $rc = FALSE;423 424 if($page[$position] == '.' || $page[$position] == '-')425 {426 //Only break if the next character is a white space,427 //in order to avoid breaks on cases like this: (hello world.)428 if(is_white_space($position + 1) || $page[$position + 1] == '<')429 {430 $rc = TRUE;431 }432 }433 else if($page[$position] == ',' || $page[$position] == '?' ||434 $page[$position] == '(' || $page[$position] == ')' ||435 $page[$position] == '[' || $page[$position] == ']' ||436 $page[$position] == '"' || $page[$position] == '!' ||437 $page[$position] == ':' || $page[$position] == '|')438 {439 //break the sentence into segments regardless of the next character.440 $rc = TRUE;441 }442 443 return $rc;429 global $page; 430 $rc = FALSE; 431 432 if($page[$position] == '.' || $page[$position] == '-') 433 { 434 //Only break if the next character is a white space, 435 //in order to avoid breaks on cases like this: (hello world.) 436 if(is_white_space($position + 1) || $page[$position + 1] == '<') 437 { 438 $rc = TRUE; 439 } 440 } 441 else if($page[$position] == ',' || $page[$position] == '?' || 442 $page[$position] == '(' || $page[$position] == ')' || 443 $page[$position] == '[' || $page[$position] == ']' || 444 $page[$position] == '"' || $page[$position] == '!' || 445 $page[$position] == ':' || $page[$position] == '|') 446 { 447 //break the sentence into segments regardless of the next character. 448 $rc = TRUE; 449 } 450 451 return $rc; 444 452 } 445 453 … … 448 456 * entity. E.g & 449 457 * Return 0 if not an html entity otherwise return the position past this 450 * entity. 458 * entity. 451 459 * 452 460 */ 453 461 function is_html_entity($position) 454 462 { 455 global $page;456 if($page[$position] == "&" )457 {458 $end_pos = $position + 1;459 460 while($page[$end_pos] == "#" ||461 is_digit($end_pos) || is_a_to_z_character($end_pos))462 {463 $end_pos++;464 }465 466 if($page[$end_pos] == ';')467 {468 $entity = substr($page, $position, $end_pos - $position + 1);469 470 //Don't break on ` so for our use we don't consider it an entity471 //e.g. Jack`s apple472 if($entity == "’" || $entity == "'")473 {474 return 0;475 }476 477 //It is an html entity.478 return $end_pos + 1;479 }480 }481 482 return 0;463 global $page; 464 if($page[$position] == "&" ) 465 { 466 $end_pos = $position + 1; 467 468 while($page[$end_pos] == "#" || 469 is_digit($end_pos) || is_a_to_z_character($end_pos)) 470 { 471 $end_pos++; 472 } 473 474 if($page[$end_pos] == ';') 475 { 476 $entity = substr($page, $position, $end_pos - $position + 1); 477 478 //Don't break on ` so for our use we don't consider it an entity 479 //e.g. Jack`s apple 480 if($entity == "’" || $entity == "'") 481 { 482 return 0; 483 } 484 485 //It is an html entity. 486 return $end_pos + 1; 487 } 488 } 489 490 return 0; 483 491 } 484 492 … … 493 501 function is_a_to_z_character($position) 494 502 { 495 global $page;496 497 if(($page[$position] >= 'a' && $page[$position] <= 'z') ||498 ($page[$position] >= 'A' && $page[$position] <= 'Z'))499 {500 return TRUE;501 }502 503 return FALSE;503 global $page; 504 505 if(($page[$position] >= 'a' && $page[$position] <= 'z') || 506 ($page[$position] >= 'A' && $page[$position] <= 'Z')) 507 { 508 return TRUE; 509 } 510 511 return FALSE; 504 512 } 505 513 … … 510 518 function is_digit($position) 511 519 { 512 global $page;513 514 if($page[$position] >= '0' && $page[$position] <= '9')515 {516 return TRUE;517 }518 519 return FALSE;520 } 521 522 /* 523 * Determine if the current position in buffer is a white space. 520 global $page; 521 522 if($page[$position] >= '0' && $page[$position] <= '9') 523 { 524 return TRUE; 525 } 526 527 return FALSE; 528 } 529 530 /* 531 * Determine if the current position in buffer is a white space. 524 532 * return TRUE if current position marks a white space otherwise FALSE. 525 */ 533 */ 526 534 function is_white_space($position) 527 535 { 528 global $page;529 530 if($page[$position] == " " || $page[$position] == "" ||531 $page[$position] == "\t" || $page[$position] == "\r" ||532 $page[$position] == "\n" || $page[$position] == "\x0B" ||533 $page[$position] == "\0")534 {535 return TRUE;536 }536 global $page; 537 538 if($page[$position] == " " || $page[$position] == "" || 539 $page[$position] == "\t" || $page[$position] == "\r" || 540 $page[$position] == "\n" || $page[$position] == "\x0B" || 541 $page[$position] == "\0") 542 { 543 return TRUE; 544 } 537 545 } 538 546 … … 540 548 * Skip within buffer past unreadable characters , i.e. white space 541 549 * and characters considred to be a sentence breaker. Staring from the specified 542 * position going either forward or backward. 543 * param forward - indicate direction going either backward of forward. 550 * position going either forward or backward. 551 * param forward - indicate direction going either backward of forward. 544 552 */ 545 553 function skip_unreadable_chars(&$index, $forward=TRUE) 546 554 { 547 global $page, $pos;548 549 if(!isset($index))550 {551 //use $pos as the default position if not specified otherwise552 $index = &$pos;553 }554 $start = $index;555 556 while($index < strlen($page) && $index > 0 &&557 (is_white_space($index) || is_sentence_breaker($index)))558 {559 ($forward ? $index++ : $index--);560 }561 562 return $index;555 global $page, $pos; 556 557 if(!isset($index)) 558 { 559 //use $pos as the default position if not specified otherwise 560 $index = &$pos; 561 } 562 $start = $index; 563 564 while($index < strlen($page) && $index > 0 && 565 (is_white_space($index) || is_sentence_breaker($index))) 566 { 567 ($forward ? $index++ : $index--); 568 } 569 570 return $index; 563 571 } 564 572 565 573 /* 566 574 * Skip within buffer past white space characters , Staring from the specified 567 * position going either forward or backward. 568 * param forward - indicate direction going either backward of forward. 575 * position going either forward or backward. 576 * param forward - indicate direction going either backward of forward. 569 577 */ 570 578 function skip_white_space(&$index, $forward=TRUE) 571 579 { 572 global $page, $pos;573 574 if(!isset($index))575 {576 //use $pos as the default position if not specified otherwise577 $index = &$pos;578 }579 580 while($index < strlen($page) && $index > 0 && is_white_space($index))581 {582 ($forward ? $index++ : $index--);583 }584 585 return $index;580 global $page, $pos; 581 582 if(!isset($index)) 583 { 584 //use $pos as the default position if not specified otherwise 585 $index = &$pos; 586 } 587 588 while($index < strlen($page) && $index > 0 && is_white_space($index)) 589 { 590 ($forward ? $index++ : $index--); 591 } 592 593 return $index; 586 594 } 587 595 588 596 /** 589 597 * Translate the text between the given start position and the current 590 * position (pos) within the buffer. 598 * position (pos) within the buffer. 591 599 */ 592 600 function translate_text($start) 593 601 { 594 logger("Enter " . __METHOD__ . " : $start", 4);595 global $page, $pos, $is_edit_mode;596 597 //trim white space from the start position going forward598 skip_white_space($start);599 600 //Set the end position of the string to one back from current position601 //(i.e. current position points to '<' or a breaker '.') and then trim602 //white space from the right backwards603 $end = $pos - 1;604 $end = skip_white_space($end, $forward=FALSE);605 606 if($start >= $end)607 {608 //empty string - nothing to do609 return;610 }611 612 $original_text = substr($page, $start, $end - $start + 1);613 614 //Cleanup and prepare text615 $original_text = scrub_text($original_text);616 if($original_text == NULL)617 {618 //nothing left from the text619 return;620 }621 622 $translated_text = fetch_translation($original_text);623 624 insert_translation($original_text, $translated_text, $start, $end);602 logger("Enter " . __METHOD__ . " : $start", 4); 603 global $page, $pos, $is_edit_mode; 604 605 //trim white space from the start position going forward 606 skip_white_space($start); 607 608 //Set the end position of the string to one back from current position 609 //(i.e. current position points to '<' or a breaker '.') and then trim 610 //white space from the right backwards 611 $end = $pos - 1; 612 $end = skip_white_space($end, $forward=FALSE); 613 614 if($start >= $end) 615 { 616 //empty string - nothing to do 617 return; 618 } 619 620 $original_text = substr($page, $start, $end - $start + 1); 621 622 //Cleanup and prepare text 623 $original_text = scrub_text($original_text); 624 if($original_text == NULL) 625 { 626 //nothing left from the text 627 return; 628 } 629 630 $translated_text = fetch_translation($original_text); 631 632 insert_translation($original_text, $translated_text, $start, $end); 625 633 } 626 634 … … 634 642 function insert_translation(&$original_text, &$translated_text, $start, $end) 635 643 { 636 global $segment_id, $is_edit_mode, $tags_list;637 638 $is_translated = FALSE;639 640 if(!$is_edit_mode || !in_array('body', $tags_list))641 {642 if($translated_text != NULL)643 {644 update_translated_page($start, $end, $translated_text);645 }646 }647 else648 {649 $span = "<span id=\"" . SPAN_PREFIX . "$segment_id\">";650 651 if($translated_text == NULL)652 {653 $span .= $original_text . '</span>';654 }655 else656 {657 $span .= $translated_text . "</span>";658 $is_translated = TRUE;659 }660 661 //Insert text (either original or translated) marked by a <span>662 update_translated_page($start, $end, $span);663 664 665 //Insert image to allow editing this segment666 $img = get_img_tag($original_text, $translated_text, $segment_id, $is_translated);667 update_translated_page($end + 1, - 1, $img);668 669 //Increment only after both text and image are generated so they670 //will be the same for each translated segement671 $segment_id++;672 673 }674 675 logger("Exit " . __METHOD__ . " : $original_text" , 4);644 global $segment_id, $is_edit_mode, $tags_list; 645 646 $is_translated = FALSE; 647 648 if(!$is_edit_mode || !in_array('body', $tags_list)) 649 { 650 if($translated_text != NULL) 651 { 652 update_translated_page($start, $end, $translated_text); 653 } 654 } 655 else 656 { 657 $span = "<span id=\"" . SPAN_PREFIX . "$segment_id\">"; 658 659 if($translated_text == NULL) 660 { 661 $span .= $original_text . '</span>'; 662 } 663 else 664 { 665 $span .= $translated_text . "</span>"; 666 $is_translated = TRUE; 667 } 668 669 //Insert text (either original or translated) marked by a <span> 670 update_translated_page($start, $end, $span); 671 672 673 //Insert image to allow editing this segment 674 $img = get_img_tag($original_text, $translated_text, $segment_id, $is_translated); 675 update_translated_page($end + 1, - 1, $img); 676 677 //Increment only after both text and image are generated so they 678 //will be the same for each translated segement 679 $segment_id++; 680 681 } 682 683 logger("Exit " . __METHOD__ . " : $original_text" , 4); 676 684 } 677 685 … … 684 692 function scrub_text(&$text) 685 693 { 686 //skip strings like without any readable characters (i.e. ".")687 //Todo: need a broader defintion for non-ascii characters as well688 if(preg_match("/^[.?!|\(\)\[\],0-9]+$/", $text))689 {690 return NULL;691 }692 693 //replace multi space chars with a single space694 $text = preg_replace("/\s\s+/", " ", $text);695 696 return $text;694 //skip strings like without any readable characters (i.e. ".") 695 //Todo: need a broader defintion for non-ascii characters as well 696 if(preg_match("/^[.?!|\(\)\[\],0-9]+$/", $text)) 697 { 698 return NULL; 699 } 700 701 //replace multi space chars with a single space 702 $text = preg_replace("/\s\s+/", " ", $text); 703 704 return $text; 697 705 } 698 706 … … 704 712 * param start - marks the starting position of the replaced string in the original page. 705 713 * param end - marks the end position of the replaced string in the original page. 706 Use -1 to do insert instead of replace.714 Use -1 to do insert instead of replace. 707 715 * param translated_text - text to be inserted. 708 716 */ 709 717 function update_translated_page($start, $end, $translated_text) 710 718 { 711 global $page, $tr_page, $tr_mark;712 713 //Bring the translated up to date up to the start position.714 while($tr_mark < $start)715 {716 $tr_page .= $page[$tr_mark++];717 }718 719 $tr_page .= $translated_text;720 721 if($end > $start)722 {723 //Move mark to correlate the posistion between the two pages.724 //Only do this when some content has been replaced, i.e. not725 //an insert.726 $tr_mark = $end + 1;727 }728 719 global $page, $tr_page, $tr_mark; 720 721 //Bring the translated up to date up to the start position. 722 while($tr_mark < $start) 723 { 724 $tr_page .= $page[$tr_mark++]; 725 } 726 727 $tr_page .= $translated_text; 728 729 if($end > $start) 730 { 731 //Move mark to correlate the posistion between the two pages. 732 //Only do this when some content has been replaced, i.e. not 733 //an insert. 734 $tr_mark = $end + 1; 735 } 736 729 737 } 730 738
Note: See TracChangeset
for help on using the changeset viewer.
