LSlang: improve internal message extrators from PHP files

- add support of multiple lines messages
- detect and ignore commented code block
- improving code tracing

Note: still no support of advanced gettext functions (ngettext,
dcgettext, ...).
This commit is contained in:
Benjamin Renard 2023-05-26 11:01:03 +02:00
parent 65d1d50c3a
commit 1c900320db
Signed by: bn8
GPG key ID: 3E2E1CE1907115BC

View file

@ -545,59 +545,185 @@ function _cli_find_and_parse_template_file($dir) {
function _cli_parse_php_file($file) {
global $LSlang_cli_logger;
$LSlang_cli_logger -> debug("Looking for string to translate in '$file' PHP file");
$count = 0;
$quote='';
$res='';
foreach(file($file) as $line) {
$count++;
$LSlang_cli_logger -> trace("Handle line #$count of '$file' PHP file");
$offset=0;
while ($pos = strpos($line,'__(',$offset)) {
$LSlang_cli_logger -> trace("$file:$count: detect keyword at position #$pos ('$line')");
for ($i=$pos+3;$i<strlen($line);$i++) {
$offset=$i; // Always increase offset to avoid infinity-loop
if (empty($quote)) {
$quote = '';
$res = '';
$comment = null;
$line_break_continue = false;
$inside_keyword = false;
$inside_comment = false;
$end_comment_pos = false;
$keyword = null;
foreach(file($file) as $line_count => $line) {
$line = preg_replace("/\n$/", "", $line);
$line_count++;
$LSlang_cli_logger -> trace("Handle line #$line_count of '$file' PHP file : '$line'");
$offset = 0;
while (
$line_break_continue
|| (!$inside_comment && preg_match('#^([ \t])*(//|/\*)#', substr($line, $offset), $comment))
|| ($inside_comment && $end_comment_pos = strpos(substr($line, $offset), '*/'))
|| preg_match('/[^a-zA-Z0-9\'\"]_{1,3}\(/', substr($line, $offset), $keyword, PREG_OFFSET_CAPTURE)
) {
if ($line_break_continue) {
$LSlang_cli_logger -> trace("$file:$line_count: continue from previous line after a line break");
$line_break_continue = false;
$offset = 0;
$inside_keyword = true;
}
elseif ($comment) {
$pos = $offset + strlen($comment[1]);
if ($comment[2] == '//') {
$LSlang_cli_logger -> trace(
"$file:$line_count: entering comment detected '".$comment[2]."' at position #$pos, ".
"ignore the line ('$line', offset=$offset)");
break;
}
$LSlang_cli_logger -> trace(
"$file:$line_count: entering comment detected '".$comment[2]."' at position #$pos ".
"('$line', offset=$offset)");
$inside_comment = true;
$offset = $pos + strlen($comment[2]);
$comment = null;
}
elseif ($inside_comment) {
if ($end_comment_pos) {
$pos = $offset + $end_comment_pos;
$LSlang_cli_logger -> trace(
"$file:$line_count: end of comment detected at position #$pos, continue ".
"('$line', offset=$offset)");
$offset = $pos;
$end_comment_pos = null;
$inside_comment = false;
continue;
}
if ($keyword) {
$pos = $offset + $keyword[0][1];
$LSlang_cli_logger -> trace(
"$file:$line_count: detect keyword '".substr($keyword[0][0], 1).")' at position #$pos,".
" but inside comment => ignore it ('$line', offset=$offset)");
$offset += strlen($keyword[0][0]) + $keyword[0][1];
}
else {
$LSlang_cli_logger -> fatal(
"$file:$line_count: not supported case (inside comment & !keyword), stop");
return;
}
}
else if ($keyword) {
$pos = $offset + $keyword[0][1];
$LSlang_cli_logger -> trace(
"$file:$line_count: detect keyword '".substr($keyword[0][0], 1).")' at position #$pos ".
"('$line', offset=$offset)");
$offset += strlen($keyword[0][0]) + $keyword[0][1];
$closed = false;
$inside_keyword = true;
}
else {
$LSlang_cli_logger -> fatal(
"$file:$line_count: not supported case, stop");
return;
}
$ignore = false;
$concatenation_need = false;
for ($i=$offset; $i<strlen($line); $i++) {
$offset = $i; // Always increase offset to avoid infinity-loop
if ($inside_comment) {
if (substr($line, $i, 2) == '*/') {
$LSlang_cli_logger -> trace(
"$file:$line_count: End of comment detected at position #$i ('$line')");
$inside_comment = false;
}
$i++;
}
elseif (empty($quote)) {
// Quote char not detected : try to detect it
if ($line[$i]=='\\' || $line[$i]==" " || $line[$i]=="\t") {
// Space or escape char : pass
$i++;
}
elseif ($line[$i]=='"' || $line[$i]=="'") {
elseif (in_array($line[$i], array('"', "'")) && !$concatenation_need) {
// Quote detected
$LSlang_cli_logger -> trace(
"$file:$line_count: Quote (".$line[$i].") detected at position #$i ('$line')");
$quote=$line[$i];
}
elseif ($line[$i]=='$' || $line[$i]==')') {
elseif ($line[$i]=='$') {
// Variable translation not possible or end function call detected
$LSlang_cli_logger -> trace(
"$file:$line_count: Variable translation detected at position #$i, ignore it ".
"('$line')");
$ignore = true;
break;
}
elseif ($line[$i]==')' && $inside_keyword) {
// End function call detected
$LSlang_cli_logger -> trace(
"$file:$line_count: End function call detected at position #$i ('$line')");
$inside_keyword = false;
break;
}
elseif ($line[$i] == '.' && $concatenation_need) {
// Concatenation char detected
$LSlang_cli_logger -> trace(
"$file:$line_count: Concatenation char detected at position #$i ('$line')");
$concatenation_need = false;
}
elseif (preg_match('/[A-Za-z]/', $line[$i])) {
// Constant or function call detected
$LSlang_cli_logger -> trace(
"$file:$line_count: Constant or function call detected at position #$i, ignore it ('$line')");
$ignore = true;
break;
}
else {
// Unknown case : continue
$LSlang_cli_logger -> trace(
"$file:$line_count: Unknown case before quote at position #$i ('".$line[$i]."'), ".
"ignore it and continue ('$line')");
$i++;
}
}
elseif ($quote) {
// Quote char already detected : try to detect end quote char
if ($line[$i]=='\\') {
// Escape char detected : pass this char and the following one
$res.=$line[$i];
if ($line[$i] == '\\') {
// Escape char detected : keep it and the following one
$LSlang_cli_logger -> trace(
"$file:$line_count: Escape char detected inside quote at position #$i, keep it and ".
"the following one ('$line')");
$res .= $line[$i];
$i++;
$res.=$line[$i];
$res .= $line[$i];
}
elseif ($line[$i]==$quote) {
// End quote char detected : reset quote char detection and break detection
$quote='';
break;
elseif ($line[$i] == $quote) {
// End quote char detected : reset quote and set concatenation as need
$LSlang_cli_logger -> trace(
"$file:$line_count: End quote char detected at position #$i, reset quote and set ".
"concatenation as need ('$line')");
$quote = '';
$concatenation_need = true;
}
else {
// End quote char not detected : append current char to result
$res.=$line[$i];
$res .= $line[$i];
}
}
}
// Include detected string if not empty and quote char was detected and reseted
if (!empty($res) && empty($quote)) {
_cli_add_str_to_translate($res, _cli_absolute2relative_path($file).":$count");
$res='';
if ($inside_comment) {
$LSlang_cli_logger -> trace(
"$file:$line_count: line break detected inside comment ('$line')");
}
elseif (!empty($res) && empty($quote) && !$inside_keyword) {
_cli_add_str_to_translate($res, _cli_absolute2relative_path($file).":$line_count");
$res = '';
}
else if (!$ignore) {
$LSlang_cli_logger -> trace(
"$file:$line_count: line break detected inside keyword, continue on next line ('$line')");
$line_break_continue = true;
if ($quote)
$res .= "\n";
break;
}
}
}
@ -609,12 +735,24 @@ function _cli_parse_php_file($file) {
* @param string $dir The directory path
* @return void
*/
function _cli_find_and_parse_php_file($dir, $filename_regex) {
function _cli_find_and_parse_php_file($dir, $filename_regex=null, $recursive=false, $ignore_dirs=null) {
global $LSlang_cli_logger;
$filename_regex = $filename_regex?$filename_regex:'/^(.+)\.php$/';
if (is_dir($dir)) {
$LSlang_cli_logger -> debug("Looking for string to translate in PHP files of directory '$dir'");
if ($dh = opendir($dir)) {
while (($file = readdir($dh)) !== false) {
if (preg_match($filename_regex, $file)) {
_cli_parse_php_file($dir.'/'.$file);
if ($file == '.' || $file == '..') continue;
if (is_link("$dir/$file")) continue;
if (
is_dir("$dir/$file")
&& $recursive
&& (!is_array($ignore_dirs) || !in_array("$dir/$file", $ignore_dirs))
) {
_cli_find_and_parse_php_file("$dir/$file", $filename_regex, true, $ignore_dirs);
}
elseif (is_file($dir."/".$file) && preg_match($filename_regex, $file)) {
_cli_parse_php_file("$dir/$file");
}
}
closedir($dh);
@ -1018,8 +1156,8 @@ function cli_generate_lang_file($command_args) {
// Manage includes files
if (!in_array('includes', $withouts) && (!$only || $only == 'includes')) {
// Note: Upstream code most only use gettext translation, do not handle it here
if ($include_upstream) _cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_INCLUDE_DIR, '/^(.+)\.php$/');
_cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_LOCAL_DIR.LS_INCLUDE_DIR, '/^(.+)\.php$/');
if ($include_upstream) _cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_INCLUDE_DIR);
_cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_LOCAL_DIR.LS_INCLUDE_DIR);
if ($include_upstream) _cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_CLASS_DIR, '/^class\.(.+)\.php$/');
_cli_find_and_parse_php_file(LS_ROOT_DIR.'/'.LS_LOCAL_DIR.LS_CLASS_DIR, '/^class\.(.+)\.php$/');
}