adding php pspell support
authortokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
Sat, 30 Sep 2006 17:35:20 +0000 (17:35 +0000)
committertokul <tokul@7612ce4b-ef26-0410-bec9-ea0150e637f0>
Sat, 30 Sep 2006 17:35:20 +0000 (17:35 +0000)
git-svn-id: https://svn.code.sf.net/p/squirrelmail/code/trunk/squirrelmail@11789 7612ce4b-ef26-0410-bec9-ea0150e637f0

plugins/squirrelspell/class/cmd_spell.php [new file with mode: 0644]
plugins/squirrelspell/class/common.php [new file with mode: 0644]
plugins/squirrelspell/class/php_pspell.php [new file with mode: 0644]

diff --git a/plugins/squirrelspell/class/cmd_spell.php b/plugins/squirrelspell/class/cmd_spell.php
new file mode 100644 (file)
index 0000000..dddc698
--- /dev/null
@@ -0,0 +1,316 @@
+<?php
+/**
+ * Command line spellcheck class
+ *
+ * ---
+ *  Quick brownn fox brownn
+ *
+ *  brownn squirrel.
+ *  twentytwo owttnewt
+ * ---
+ * @(#) International Ispell Version 3.1.20 10/10/95, patch 1
+ * *
+ * & brownn 5 7: brown, Browne, browns, brown n, brown-n
+ * *
+ * & brownn 5 18: brown, Browne, browns, brown n, brown-n
+ * 
+ * 
+ * & brownn 5 1: brown, Browne, browns, brown n, brown-n
+ * *
+ * 
+ * & twentytwo 2 1: twenty two, twenty-two
+ * # owttnewt 11
+ * 
+ * 
+ * ---
+ * $params = array();
+ * $params['spell_command'] = 'ispell -d american -a';
+ * $params['use_proc_open'] = false; // (check_php_version(4,3))
+ * $params['temp_dir'] = '/tmp/'; // $attachment_dir
+ * $params['userdic'] = array(); // user's dictionary
+ * $params['debug'] = true;
+ * 
+ * $spell = new cmd_spell($params);
+ * // check $spell->error buffer
+ * 
+ * $text = "Quick brownn fox brownn\n\nbrownn squirrel.\ntwentytwo owttnewt";
+ * 
+ * $results = $spell->check_text($text);
+ * // check $spell->error buffer
+ * // parse $results
+ *
+ * @copyright &copy; 1999-2006 The SquirrelMail Project Team
+ * @license http://opensource.org/licenses/gpl-license.php GNU Public License
+ * @version $Id$
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+
+/**
+ * Command line spellcheck class, compatible with ispell and aspell.
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+class cmd_spell extends squirrelspell {
+    /**
+     * @var string
+     */
+    var $spell_command = '';
+    var $userdic = array();
+    /**
+     * Controls which function is used to execute ispell. proc_open() 
+     * should be used in PHP 4.3+. exec() can be used in older PHP versions.
+     * @var boolean
+     */
+    var $use_proc_open = false;
+    /**
+     * @var string
+     */
+    var $temp_dir = '';
+    /**
+     */
+    var $debug = false;
+
+    var $missed_words = array();
+
+    /**
+     * Constructor function
+     * @param array $aParams
+     */
+    function cmd_spell($aParams=array()) {
+        if (! isset($aParams['spell_command'])) {
+            return $this->set_error('Spellcheck command is not set.');
+        } else {
+            $this->spell_command = $aParams['spell_command'];
+        }
+
+        if (isset($aParams['userdic'])) {
+            $this->userdic = $aParams['userdic'];
+        }
+
+        if (isset($aParams['use_proc_open'])) {
+            $this->use_proc_open = (bool) $aParams['use_proc_open'];
+        }
+
+        if (isset($aParams['temp_dir'])) {
+            $this->temp_dir = $aParams['temp_dir'];
+            // add slash to attachment directory, if it does not end with slash. 
+            if (substr($this->temp_dir, -1) != '/') {
+                $this->temp_dir = $this->temp_dir . '/';
+            }
+        } elseif (!$this->use_proc_open) {
+            return $this->set_error('Temporally directory is not set.');
+        }
+
+        if (isset($aParams['debug']) && (bool) $aParams['debug']) {
+            $this->debug = true;
+            error_reporting(E_ALL);
+            ini_set('display_errors',1);
+        }
+
+    }
+
+    /**
+     * @param string $sText
+     * @return mixed array with command output or false.
+     */
+    function proc_open_spell($sText) {
+        $descriptorspec = array(
+             0 => array('pipe', 'r'),  // stdin is a pipe that the child will read from
+             1 => array('pipe', 'w'),  // stdout is a pipe that the child will write to
+             2 => array('pipe', 'w'), // stderr is a pipe that the child will write to
+             );
+
+        if ($this->debug) {
+            $spell_proc = proc_open($this->spell_command, $descriptorspec, $pipes);
+        } else {
+            $spell_proc = @proc_open($this->spell_command, $descriptorspec, $pipes);
+        }
+
+        if ( ! is_resource($spell_proc) ) {
+            return $this->set_error(sprintf(_("Could not run the spellchecker command (%s)."),
+                                            $this->spell_command));
+        }
+
+        if ( ! @fwrite($pipes[0],$sText) ) {
+            $this->set_error(_("Error while writing to pipe."));
+            // close all three $pipes here.
+            for($i=0; $i<=2; $i++) {
+                // disable all fclose error messages
+                @fclose($pipes[$i]);
+            }
+            return false;
+        }
+
+        fclose($pipes[0]);
+
+        $sqspell_output = array();
+        for($i=1; $i<=2; $i++) {
+            while(!feof($pipes[$i])) {
+                array_push($sqspell_output, rtrim(fgetss($pipes[$i],999),"\r\n"));
+            }
+            fclose($pipes[$i]);
+        }
+
+        if (proc_close($spell_proc)) {
+            $error = '';
+            foreach ($sqspell_output as $line) {
+                $error.= $line . "\n";
+            }
+            return $this->set_error($error);
+        } else {
+            return $sqspell_output;
+        }
+    }
+
+    /**
+     * @param string $sText
+     * @return mixed array with command output or false.
+     */
+    function exec_spell($sText) {
+        // find unused file in attachment directory
+        do {
+            $floc = $this->temp_dir . md5($sText . microtime());
+        } while (file_exists($floc));
+
+        if ($this->debug) {
+            $fp = fopen($floc, 'w');
+        } else {
+            $fp = @fopen($floc, 'w');
+        }
+        if ( ! is_resource($fp) ) {
+            return $this->set_error(sprintf(_("Could not open temporary file '%s'."),
+                                     $floc) );
+        }
+
+        if ( ! @fwrite($fp, $sText) ) {
+            $this->set_error(sprintf(_("Error while writing to temporary file '%s'."),
+                                     $floc) );
+            // close file descriptor
+            fclose($fp);
+            return false;
+        }
+        fclose($fp);
+
+        exec("$this->spell_command < $floc 2>&1", $sqspell_output, $exitcode);
+
+        unlink($floc);
+
+        if ($exitcode) {
+            $error = '';
+            foreach ($sqspell_output as $line) {
+                $error.= $line . "\n";
+            }
+            return $this->set_error($error);
+        } else {
+            return $sqspell_output;
+        }
+    }
+
+    /**
+     * Prepares string for ispell/aspell parsing
+     * 
+     * Function adds an extra space at the beginning of each line. This way
+     * ispell/aspell don't treat these as command characters.
+     * @param string $sText
+     * @return string
+     */
+    function prepare_text($sText) {
+        // prepend space to every sqspell_new_text line
+        $sText = str_replace("\r\n","\n",$sText);
+        $ret = '';
+        foreach (explode("\n",$sText) as $line) {
+            $ret.= ' ' . $line . "\n";
+        }
+        return $ret;
+    }
+
+    /**
+     * Checks block of text
+     * @param string $sText text
+     * @return array
+     */
+    function check_text($sText) {
+        $this->missed_words = array();
+
+        $sText = $this->prepare_text($sText);
+
+        if ($this->use_proc_open) {
+            $sqspell_output = $this->proc_open_spell($sText);
+        } else {
+            $sqspell_output = $this->exec_spell($sText);
+        }
+
+        /**
+         * Define some variables to be used during the processing.
+         */
+        $current_line=0;
+        /**
+         * Now we process the output of sqspell_command (ispell or aspell in
+         * ispell compatibility mode, whichever). I'm going to be scarce on
+         * comments here, since you can just look at the ispell/aspell output
+         * and figure out what's going on. ;) The best way to describe this is
+         * "Dark Magic".
+         */
+        for ($i=0; $i<sizeof($sqspell_output); $i++){
+            switch (substr($sqspell_output[$i], 0, 1)){
+            /**
+             * Line is empty.
+             * Ispell adds empty lines when an end of line is reached
+             */
+            case '':
+                $current_line++;
+                break;
+            /**
+             * Line begins with "&".
+             * This means there's a misspelled word and a few suggestions.
+             */
+            case '&':
+                list($left, $right) = explode(": ", $sqspell_output[$i]);
+                $tmparray = explode(" ", $left);
+                $sqspell_word=$tmparray[1];
+                /**
+                 * Check if the word is in user dictionary.
+                 */
+                if (! in_array($sqspell_word,$this->userdic)){
+                    $sqspell_symb=intval($tmparray[3])-1;
+                    // add suggestions
+                    if (!isset($this->missed_words[$sqspell_word])) {
+                        foreach(explode(',',$right) as $word) {
+                            $this->missed_words[$sqspell_word]['suggestions'][] = trim($word);
+                        }
+                    }
+                    // add location
+                    $this->missed_words[$sqspell_word]['locations'][] = "$current_line:$sqspell_symb";
+                }
+                break;
+            /**
+             * Line begins with "#".
+             * This means a misspelled word and no suggestions.
+             */
+            case '#':
+                $tmparray = explode(" ", $sqspell_output[$i]);
+                $sqspell_word=$tmparray[1];
+                /**
+                 *
+                 * Check if the word is in user dictionary.
+                 */
+                if (!in_array($sqspell_word,$this->userdic)){
+                    $sqspell_symb=intval($tmparray[2])-1;
+                    // no suggestions
+                    $this->missed_words[$sqspell_word]['suggestions'] = array();
+                    // add location
+                    $this->missed_words[$sqspell_word]['locations'][] = "$current_line:$sqspell_symb";
+                }
+                break;
+            }
+        }
+        return $this->missed_words;
+    }
+}
+
+
+/**
+ * Define the command used to spellcheck the document.
+ */
+#$sqspell_command=$SQSPELL_APP[$sqspell_use_app];
diff --git a/plugins/squirrelspell/class/common.php b/plugins/squirrelspell/class/common.php
new file mode 100644 (file)
index 0000000..adbc4b9
--- /dev/null
@@ -0,0 +1,29 @@
+<?php
+/**
+ * Common spellcheck class functions
+ * @copyright &copy; 1999-2006 The SquirrelMail Project Team
+ * @license http://opensource.org/licenses/gpl-license.php GNU Public License
+ * @version $Id$
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+
+/**
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+class squirrelspell {
+    var $error = '';
+    /**
+     * @param string $sError error message
+     * @return boolean false
+     */
+    function set_error($sError) {
+        $this->error = $sError;
+        return false;
+    }
+
+    function check_text($sText) {
+        return $this->set_error('check_text method is not implemented in this class.');
+    }
+}
\ No newline at end of file
diff --git a/plugins/squirrelspell/class/php_pspell.php b/plugins/squirrelspell/class/php_pspell.php
new file mode 100644 (file)
index 0000000..5f15b3d
--- /dev/null
@@ -0,0 +1,135 @@
+<?php
+/**
+ * PHP pspell spellcheck class functions
+ * @copyright &copy; 2006 The SquirrelMail Project Team
+ * @license http://opensource.org/licenses/gpl-license.php GNU Public License
+ * @version $Id$
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+
+/**
+ * PHP Pspell class
+ * @package plugins
+ * @subpackage squirrelspell
+ */
+class php_pspell extends squirrelspell {
+    //
+    var $dict = 'en';
+    var $subdict = '';
+    var $jargon = '';
+    var $charset = 'utf-8';
+    var $mode = null;
+    var $userdic = array();
+
+    /**
+     */
+    var $missed_words = array();
+
+    /**
+     * Error buffer
+     * @var string
+     */
+    var $error = '';
+    /**
+     */
+    var $dictionary_link = null;
+
+    /**
+     * Constructor function
+     * @param array $aParams
+     */
+    function php_pspell($aParams=array()) {
+        if (! extension_loaded('pspell')) {
+            return $this->set_error('Pspell extension is not available');
+        }
+        //
+        if (isset($aParams['dictionary'])) {
+            $aDict = explode(',',$aParams['dictionary']);
+            if (isset($aDict[0])) $this->dict = trim($aDict[0]);
+            if (isset($aDict[1])) $this->subdict = trim($aDict[1]);
+            if (isset($aDict[2])) $this->jargon = trim($aDict[2]);
+        }
+        if (isset($aParams['charset'])) {
+            $this->charset = $aParams['charset'];
+        }
+        if (isset($aParams['userdic'])) {
+            $this->userdic = $aParams['userdic'];
+        }
+        if (isset($aParams['mode'])) {
+            $this->mode = $aParams['mode'];
+        } else {
+            $this->mode = PSPELL_FAST;
+        }
+        // dict, subdict, jargon, charset, spellcheck_type
+        $this->dictionary_link = pspell_new($this->dict,$this->subdict,$this->jargon,$this->charset,$this->mode);
+    }
+
+    // private functions
+    function check_word($sWord) {
+        return pspell_check($this->dictionary_link,$sWord);
+    }
+
+    function suggest($sWord) {
+        return pspell_suggest($this->dictionary_link,$sWord);
+    }
+
+    // public function
+
+    /**
+     * Check block of text
+     * @return array
+     */
+    function check_text($sText) {
+        // resets missed words array
+        $this->missed_words = array();
+
+        $line = 0;
+        $start = 0;
+        $position = 0;
+        $word = '';
+        // parse text. sq_* functions are used in order to work with characters and not with bytes
+        for ($i = 0; $i <= sq_strlen($sText,$this->charset); $i++) {
+            if ($i == sq_strlen($sText,$this->charset)) {
+                // add space in order to check last $word.
+                $char = ' ';
+            } else {
+                $char = sq_substr($sText,$i,1,$this->charset);
+            }
+            // Current
+            switch($char) {
+            case ' ':
+            case '.':
+            case ';':
+            case "\t":
+            case "\r":
+            case "\n":
+                if (!empty($word)) {
+                    if (isset($this->missed_words[$word]) || !$this->check_word($word)) {
+                        if (! isset($this->missed_words[$word]['suggestions'])) {
+                            $this->missed_words[$word]['suggestions'] = $this->suggest($word);
+                        }
+                        $this->missed_words[$word]['locations'][] = "$line:$start";
+                    }
+                    $word = '';
+                }
+                if ($char == "\n") {
+                    $position = 0;
+                    $line++;
+                } else {
+                    $position++;
+                }
+                break;
+            default:
+                // a-zA-Z0-9' + 8bit chars (nbspace and other spaces excluded, depends on charset)
+                // add char to word
+                if(empty($word)) {
+                    $start = $position; // squirrelspell adds one space to checked text
+                }
+                $position++;
+                $word.=$char;
+            }
+        }
+        return $this->missed_words;
+    }
+}