array('file' => image filename, 'width'/'height' => image dimensions) private $smileyImages = array(); // Cache combined regular expression for all smileys private $smileyRegexps = ''; // Cache all possible characters that a smiley can start with, in one string private $smileyStarts = ''; // Like $smileyStarts, for use in a regex character class private $smileyStartsRegexpCC = ''; public function __construct() { $this->smileyPath = 'img/smileys/'; $this->smileyImages = array( ':@' => array('file' => 'angry.png', 'width' => 11, 'height' => 11), ':-@' => array('file' => 'angry.png', 'width' => 11, 'height' => 11), '|(' => array('file' => 'annoyed.png', 'width' => 11, 'height' => 11), '|-(' => array('file' => 'annoyed.png', 'width' => 11, 'height' => 11), ':$' => array('file' => 'blush.png', 'width' => 11, 'height' => 11), ':-$' => array('file' => 'blush.png', 'width' => 11, 'height' => 11), 'B)' => array('file' => 'cool.png', 'width' => 11, 'height' => 11), 'B-)' => array('file' => 'cool.png', 'width' => 11, 'height' => 11), ';(' => array('file' => 'cry.png', 'width' => 11, 'height' => 11), ';-(' => array('file' => 'cry.png', 'width' => 11, 'height' => 11), ":'(" => array('file' => 'cry.png', 'width' => 11, 'height' => 11), ':D' => array('file' => 'grin.png', 'width' => 11, 'height' => 11), ':-D' => array('file' => 'grin.png', 'width' => 11, 'height' => 11), '^^' => array('file' => 'happy.png', 'width' => 11, 'height' => 11), '^_^' => array('file' => 'happy.png', 'width' => 11, 'height' => 11), '?(' => array('file' => 'huh.png', 'width' => 11, 'height' => 11), '?-(' => array('file' => 'huh.png', 'width' => 11, 'height' => 11), ':|' => array('file' => 'indifferent.png', 'width' => 11, 'height' => 11), ':-|' => array('file' => 'indifferent.png', 'width' => 11, 'height' => 11), ':p' => array('file' => 'razz.png', 'width' => 11, 'height' => 11), ':-p' => array('file' => 'razz.png', 'width' => 11, 'height' => 11), '8)' => array('file' => 'rolleyes.png', 'width' => 11, 'height' => 11), '8-)' => array('file' => 'rolleyes.png', 'width' => 11, 'height' => 11), ':(' => array('file' => 'sad.png', 'width' => 11, 'height' => 11), ':-(' => array('file' => 'sad.png', 'width' => 11, 'height' => 11), ':O' => array('file' => 'scared.png', 'width' => 11, 'height' => 11), ':-O' => array('file' => 'scared.png', 'width' => 11, 'height' => 11), ':/' => array('file' => 'sceptic.png', 'width' => 11, 'height' => 11), ':-/' => array('file' => 'sceptic.png', 'width' => 11, 'height' => 11), '8(' => array('file' => 'shocked.png', 'width' => 11, 'height' => 11), '8-(' => array('file' => 'shocked.png', 'width' => 11, 'height' => 11), ':)' => array('file' => 'smile.png', 'width' => 11, 'height' => 11), ':-)' => array('file' => 'smile.png', 'width' => 11, 'height' => 11), ';)' => array('file' => 'wink.png', 'width' => 11, 'height' => 11), ';-)' => array('file' => 'wink.png', 'width' => 11, 'height' => 11), '|O' => array('file' => 'yawn.png', 'width' => 11, 'height' => 11), '|-O' => array('file' => 'yawn.png', 'width' => 11, 'height' => 11), ); // Prepare smiley working variables $this->smileyRegexps = ''; $this->smileyStarts = ''; $this->smileyStartsRegexpCC = ''; foreach ($this->smileyImages as $code => $image) { // Collect all codes' first character for later fast evaluation if (strpos($this->smileyStarts, $code{0}) === false) { $this->smileyStarts .= $code{0}; $this->smileyStartsRegexpCC .= str_replace( array('\\', '-', '[', ']', '_', '^'), array('\\\\', '\-', '\[', '\]', '\_', '\^'), $code{0}); // Replace \ character as the very first! } $this->smileyRegexps .= ($this->smileyRegexps ? '|' : '') . str_replace( array('\\', '$', '(', ')', '*', '+', '.', '?', '[', ']', '^', '_', '|'), array('\\\\', '\$', '\(', '\)', '\*', '\+', '\.', '\?', '\[', '\]', '\^', '\_', '\|'), $code); // Replace \ character as the very first! } } // Sets a new smiley image path. // public function SetSmileyPath($path) { if (!is_string($path)) throw new InvalidArgumentException('Invalid argument type: $path'); if (substr($path, -1) != '/') $path .= '/'; $this->smileyPath = $path; } // Sets a new header level offset. // // headerLevelOffset = (int) Offset, from 0 to 5 // public function SetHeaderLevelOffset($headerLevelOffset) { if (!is_int($headerLevelOffset)) throw new InvalidArgumentException('Invalid argument type: $headerLevelOffset'); if ($headerLevelOffset < 0 || $headerLevelOffset > 5) throw new InvalidArgumentException('Argument out of range: $headerLevelOffset'); $this->headerLevelOffset = $headerLevelOffset; } // Resets the internal state. // private function ResetState() { $this->dst = ''; $this->actualStyle = array(); $this->desiredStyle = array(); $this->styleStack = array(); $this->listType = ''; $this->quoteLevel = 0; $this->tableLine = false; $this->tableColumns = 0; $this->maxTableColumns = 0; } // Converts a plain input into HTML output. // // src = (string) Plain input to convert // // Returns (string) generated HTML // public function Convert($src) { // Process source code line by line $this->ResetState(); $lines = explode("\n", $src); foreach ($lines as $line) { $this->ProcessLine(rtrim($line)); } // Make sure that all opened HTML tags are closed again. $this->ClearStyle(true); $this->ApplyStyle(); $dst = $this->dst; $this->ResetState(); // Clean up and save memory return $dst; } // Post-processes a converted HTML string. // // Post-processing can be necessary to resolve generated entities to localised strings or // other dynamic data. The process of conversion has been split in two phases so that the // converted output from phase 1 (UnbMarkup::Convert() method) can be cached in a database // for improved page generation performance. // // Returns (string) HTML ready for output // public function PostProcess($src) { // This is the place to replace session-dependent parts of the pre-translated HTML code // like text in a language specified by the session user. if (self::USE_UTF8) { // Replace ​ by Unicode U+200B (ZERO WIDTH SPACE) // Replace   by Unicode U+00A0 (NO-BREAK SPACE) $src = str_replace( array('​', ' '), array("\xE2\x80\x8C", "\xC2\xA0"), $src); } else { // Throw away ​ since it can only represented with Unicode // Replace   by ASCII A0h (NO-BREAK SPACE) $src = str_replace( array('​', ' '), array('', "\xA0"), $src); } return $src; } // Processes a complete line from the input. // // line = (string) Line content // inQuote = (bool) Value indicating whether the line is in a quote block // private function ProcessLine($line, $inQuote = false) { // Check captions (not in a paragraph or raw mode) if ($line{0} === '=' && preg_match('_^=(=+) (.*) =\1$_', $line, $m) && !$this->desiredStyle[self::STYLE_PAR] && !$this->desiredStyle[self::STYLE_RAW]) { $level = strlen($m[1]) + $this->headerLevelOffset; $caption = trim($m[2]); $level = min(6, $level); // Limit the level to a maximum of 6 $this->UpdateList(''); $this->UpdateTable(false); if (!$inQuote) $this->UpdateBlockQuote(0); $this->dst .= ''; $this->ProcessInline($caption, true); $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= '' . "\n"; } // Check line beginning with spaces (not in a paragraph or raw mode) else if (($line{0} === "\t" || $line{0} === ' ') && preg_match('_^(?:(\t+)|( +))_', $line, $m) && !$this->desiredStyle[self::STYLE_PAR] && !$this->desiredStyle[self::STYLE_RAW]) { $level = strlen($m[1]); // One tab is one level if (!$level) $level = intval(round(strlen($m[2]) / 4.0)); // Four spaces is one level, rounded if ($this->listType != '') { $this->dst .= '
' . "\n"; } else { $this->UpdateTable(false); if (!$inQuote) $this->UpdateBlockQuote(0); $this->desiredStyle[self::STYLE_PAR] = $level; $this->ApplyStyle(); } $this->ProcessInline(ltrim($line)); } // Check line beginning with closing angles (blockquotes) (not in raw mode or when re-processing from a quotation) else if ($line{0} === '>' && preg_match('_^(>(?: ?>)*)(| (.*))$_', $line, $m) && !$this->desiredStyle[self::STYLE_RAW] && !$inQuote) { $level = strlen(str_replace(' ', '', $m[1])); $content = $m[3]; if ($level != $this->quoteLevel) { // Quote level has changed, clean up structure. if ($this->desiredStyle[self::STYLE_PAR]) { // New blockquotes must not be contained in a paragraph. $this->desiredStyle[self::STYLE_PAR] = false; $this->ApplyStyle(); } $this->UpdateList(''); $this->UpdateTable(false); } $this->UpdateBlockQuote($level); $this->ProcessLine($content, true); // Process the remainder of the line like a line of itself } // Check separator line (not in raw mode) else if ($line{0} === '-' && preg_match('_^----+$_', $line, $m) && !$this->desiredStyle[self::STYLE_RAW]) { $this->UpdateList(''); if (!$inQuote) $this->UpdateBlockQuote(0); $this->ClearStyle(); $this->ApplyStyle(); if ($this->tableLine && $this->maxTableColumns > 0) { $this->dst .= '' . "\n"; $this->dst .= '
' . "\n"; $this->dst .= '' . "\n"; } else { $this->dst .= '
' . "\n"; } } // Check list item (not in raw mode) else if (($line{0} === '#' || $line{0} === '*') && preg_match('_^([#*][#* \t]*[ \t])(.*)$_', $line, $m) && !$this->desiredStyle[self::STYLE_RAW]) { $type = str_replace(array("\t", ' '), array('', ''), $m[1]); $content = $m[2]; $this->UpdateTable(false); if (!$inQuote) $this->UpdateBlockQuote(0); if ($this->desiredStyle[self::STYLE_PAR]) { // Lists must not be contained in a paragraph. $this->desiredStyle[self::STYLE_PAR] = false; $this->ApplyStyle(); } $this->UpdateList($type); $this->ProcessInline($content); } // Check tables (not in raw mode) else if ($line{0} === '|' && preg_match('_^\|(.*)\|$_', $line, $m) && !$this->desiredStyle[self::STYLE_RAW]) { $content = $m[1]; $this->UpdateTable(true, $inQuote); $this->tableColumns = 1; // Initialise value $this->dst .= '' . "\n"; $this->dst .= ''; $this->ProcessInline($content); $this->dst .= '' . "\n"; $this->dst .= '' . "\n"; if ($this->tableColumns > $this->maxTableColumns) { $this->maxTableColumns = $this->tableColumns; } $this->tableColumns = 0; // Clean up value } // Check empty lines (not in raw mode) else if (!strlen($line) && !$this->desiredStyle[self::STYLE_RAW]) { // Close everything, especially a paragraph. // Multiple subsequent empty lines don't have any effect. $this->UpdateList(''); $this->UpdateTable(false); if (!$inQuote) $this->UpdateBlockQuote(0); $this->ClearStyle(); $this->ApplyStyle(); } else { $this->UpdateList(''); $this->UpdateTable(false); if (!$inQuote) $this->UpdateBlockQuote(0); if (!$this->desiredStyle[self::STYLE_PAR]) { // Every text must be contained in a paragraph. $this->desiredStyle[self::STYLE_PAR] = true; } $this->ProcessInline($line); if (!$this->actualStyle[self::STYLE_PAR]) { // There was no visible content in the line that would have triggered the // paragraph style. Reset it so that this line appears empty. $this->desiredStyle[self::STYLE_PAR] = false; } if ($this->desiredStyle[self::STYLE_RAW]) { // In raw mode, line endings are preserved. $this->dst .= '
'; } $this->dst .= "\n"; } } // Processes inline input. // // line = (string) Input // onlyBasic = (bool) Only process the very basic formatting, ignore other formatting markup // private function ProcessInline($line, $onlyBasic = false) { for ($i = 0; $i < strlen($line); $i++) { if (!$this->desiredStyle[self::STYLE_RAW]) { // If not in raw mode, we can skip to the next interesting character in one step. // This will jump over areas of the source text that are plain text and contain // no control codes. This saves about 90% of the execution time for processing // source texts with something like average control code ratio. // NOTE: Update this RegExp to add new control characters that are handled at other // places. if (preg_match("_['{}\[|<" . $this->smileyStartsRegexpCC . "0-9]|(?i)https?:|ftp:|www\.|mailto:_", $line, $m, PREG_OFFSET_CAPTURE, $i)) { $nextIndex = $m[0][1]; if ($nextIndex != $i) { // Only process the text until nextIndex if we're not already right there. $this->ApplyStyle(); $this->dst .= htmlspecialchars(substr($line, $i, $nextIndex - $i)); $i = $nextIndex; } } else { // No interesting character found, skip to the end of the line. $this->ApplyStyle(); $this->dst .= htmlspecialchars(substr($line, $i)); break; } } $unprocessed = false; if (!$this->desiredStyle[self::STYLE_RAW]) { // Following control codes are *not* valid in raw mode. // Check emphasising if ($line{$i} === "'" && preg_match("_\\G'('+)_", $line, $m, 0, $i)) { $level = strlen($m[1]); $level = min(3, $level); // Limit the level to a maximum of 3 if ($level & 1) $this->desiredStyle[self::STYLE_EM] = !$this->desiredStyle[self::STYLE_EM]; if ($level & 2) $this->desiredStyle[self::STYLE_STRONG] = !$this->desiredStyle[self::STYLE_STRONG]; $i += strlen($m[1]); } // Check raw mode (not in basic mode) else if (!$onlyBasic && $line{$i} === '{' && preg_match('_\G{{{_', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_RAW] = true; $i += 2; } // Check monospace (not in monospace mode) else if ($line{$i} === '{' && !$this->desiredStyle[self::STYLE_MONO] && preg_match('_\G{{_', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_MONO] = true; $i += 1; } // Check end of monospace (only in monospace mode) else if ($line{$i} === '}' && $this->desiredStyle[self::STYLE_MONO] && preg_match('_\G}}_', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_MONO] = false; $i += 1; } // Check anchor definition else if ($line{$i} === '[' && preg_match('_\G\[\[=(\S+?)\]\]_i', $line, $m, 0, $i)) { $anchor = trim($m[1]); $this->dst .= ''; $i += strlen($m[1]) + 5 - 1; } // Check hyperlink/image (not in basic mode) else if (!$onlyBasic && $line{$i} === '[' && preg_match('_\G\[\[(.*?)\]\]([0-9a-z]*)_i', $line, $m, 0, $i)) { $wordExtra = $m[2]; // Check link caption $parts = explode(' ', trim($m[1]), 2); $link = $parts[0]; $caption = trim($parts[1]); $this->ApplyStyle(); if (preg_match('_^(https?|ftp)://[-.0-9a-z]+/[^?]+\.(bmp|gif|ico|jp2|jpg|jpeg|png|svg)$_i', urldecode($link))) { // This is an image if (!strlen($caption)) $caption = $link; $this->dst .= '' . htmlspecialchars($caption) . ''; $i += strlen($m[1]) + 4 - 1; } else { $resLink = $this->ResolveLink($link, $caption); if ($resLink) { $link = $resLink[0]; $caption = $resLink[1]; $useWordExtra = $resLink[2]; $rawCaption = $resLink[3]; if ($useWordExtra && $wordExtra != '') { $caption .= $wordExtra; } else { $wordExtra = ''; } $this->dst .= ''; if ($rawCaption) { // This would not support inline formatting in a link caption: #$this->dst .= htmlspecialchars($caption, ENT_NOQUOTES); $this->dst .= $caption; } else { // Backup and reset styles $backupActualStyle = $this->actualStyle; $backupDesiredStyle = $this->desiredStyle; $backupStyleStack = $this->styleStack; $this->actualStyle = array(); $this->desiredStyle = array(); $this->styleStack = array(); $this->ProcessInline($caption, true); $this->ClearStyle(); $this->ApplyStyle(); // Restore styles $this->actualStyle = $backupActualStyle; $this->desiredStyle = $backupDesiredStyle; $this->styleStack = $backupStyleStack; } $this->dst .= ''; $i += strlen($m[1]) + 4 - 1 + strlen($wordExtra); } else { // We were signalled that the link is invalid and should not be made clickable. $this->dst .= '['; // Don't drop the current character when we don't process it } } } // Check table column delimiter (only in table mode, not in basic mode) else if (!$onlyBasic && $line{$i} === '|' && $this->tableLine /*&& preg_match('_\G\|_', $line, $m, 0, $i)*/) { $this->ClearStyle(); $this->ApplyStyle(); $this->tableColumns++; $this->ApplyStyle(); $this->dst .= '' . "\n" . ''; } // Check left-aligned floating box (not in left/right-align or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_LEFT] && !$this->desiredStyle[self::STYLE_RIGHT] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_LEFT] = true; $i += strlen($m[1]) - 1; } // Check end of left-aligned floating box (only in left-align mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_LEFT] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ClearStyle(); $this->desiredStyle[self::STYLE_LEFT] = false; $i += strlen($m[1]) - 1; } // Check right-aligned floating box (not in left/right-align or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_LEFT] && !$this->desiredStyle[self::STYLE_RIGHT] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_RIGHT] = true; $i += strlen($m[1]) - 1; } // Check end of right-aligned floating box (only in right-align mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_RIGHT] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ClearStyle(); $this->desiredStyle[self::STYLE_RIGHT] = false; $i += strlen($m[1]) - 1; } // Check centre-aligned box (not in centre-align or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_CENTRE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_CENTRE] = true; $i += strlen($m[1]) - 1; } // Check end of centre-aligned floating box (only in centre-align mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_CENTRE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ClearStyle(); $this->desiredStyle[self::STYLE_CENTRE] = false; $i += strlen($m[1]) - 1; } // Check bordered box (not in box or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_BOX] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_BOX] = true; $i += strlen($m[1]) - 1; } // Check end of bordered box (only in box mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_BOX] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ClearStyle(); $this->desiredStyle[self::STYLE_BOX] = false; $i += strlen($m[1]) - 1; } // Check off topic (not in off-topic or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_OT] && preg_match('_\G(<(?:ot|offtopic)>)_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_OT] = true; $i += strlen($m[1]) - 1; } // Check end of off topic (only in off-topic mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_OT] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ClearStyle(); $this->desiredStyle[self::STYLE_OT] = false; $i += strlen($m[1]) - 1; } // Check text colour (not in colour or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_COLOUR] && preg_match('_\G()_i', $line, $m, 0, $i)) { $colour = $m[2]; $this->desiredStyle[self::STYLE_COLOUR] = $colour; $i += strlen($m[1]) - 1; } // Check end of text colour (only in colour mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_COLOUR] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_COLOUR] = false; $i += strlen($m[1]) - 1; } // Check background colour (not in back-colour or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_BACKC] && preg_match('_\G()_i', $line, $m, 0, $i)) { $colour = $m[2]; $this->desiredStyle[self::STYLE_BACKC] = $colour; $i += strlen($m[1]) - 1; } // Check end of background colour (only in back-colour mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_BACKC] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_BACKC] = false; $i += strlen($m[1]) - 1; } // Check text size (not in size or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_SIZE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $size = intval($m[2]) / 10.0; $this->desiredStyle[self::STYLE_SIZE] = $size; $i += strlen($m[1]) - 1; } // Check end of text size (only in size mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_SIZE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SIZE] = false; $i += strlen($m[1]) - 1; } // Check bigger font (not in big mode) else if ($line{$i} === '<' && !$this->desiredStyle[self::STYLE_BIG] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_BIG] = true; $i += strlen($m[1]) - 1; } // Check end of bigger font (only in big mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_BIG] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_BIG] = false; $i += strlen($m[1]) - 1; } // Check smaller font (not in small mode) else if ($line{$i} === '<' && !$this->desiredStyle[self::STYLE_SMALL] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SMALL] = true; $i += strlen($m[1]) - 1; } // Check end of smaller font (only in small mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_SMALL] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SMALL] = false; $i += strlen($m[1]) - 1; } // Check underline (not in underline mode) else if ($line{$i} === '<' && !$this->desiredStyle[self::STYLE_ULINE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_ULINE] = true; $i += strlen($m[1]) - 1; } // Check end of underline (only in underline mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_ULINE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_ULINE] = false; $i += strlen($m[1]) - 1; } // Check strike-through (not in strike-through mode) else if ($line{$i} === '<' && !$this->desiredStyle[self::STYLE_STRIKE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_STRIKE] = true; $i += strlen($m[1]) - 1; } // Check end of strike-through (only in strike-through mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_STRIKE] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_STRIKE] = false; $i += strlen($m[1]) - 1; } // Check subscript (not in subscript or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_SUB] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SUB] = true; $i += strlen($m[1]) - 1; } // Check end of subscript (only in subscript mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_SUB] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SUB] = false; $i += strlen($m[1]) - 1; } // Check superscript (not in superscript or basic mode) else if (!$onlyBasic && $line{$i} === '<' && !$this->desiredStyle[self::STYLE_SUP] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SUP] = true; $i += strlen($m[1]) - 1; } // Check end of superscript (only in superscript mode) else if ($line{$i} === '<' && $this->desiredStyle[self::STYLE_SUP] && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->desiredStyle[self::STYLE_SUP] = false; $i += strlen($m[1]) - 1; } // Check manual line break else if ($line{$i} === '<' && preg_match('_\G()_i', $line, $m, 0, $i)) { $this->ApplyStyle(); $this->dst .= '
'; $i += strlen($m[1]) - 1; } // Check URL (not in basic mode) else if (!$onlyBasic && (strpos('hfwHFW', $line{$i}) !== false) && preg_match('_\G(?<=[ \t]|^) ( ( (https?|ftp):// ([-$%+,.0-9=a-z\_~]*(:[-$%+,.0-9=a-z\_~]*)?@)? | www\. ) [0-9a-z]([-.0-9a-z]*[0-9a-z])? (/[-!#$%&()+,./0-9:;=?@a-z\[\]\_~]*)? ) (?=[ \t!"),.:;?]|$) _ix', $line, $m, 0, $i)) { // Exclude matched ending characters from the link $m[1] = preg_replace('_[!"),.:;?]$_', '', $m[1]); $link = $m[1]; $caption = htmlspecialchars($link, ENT_NOQUOTES); // Add ​ after each [/?&] to allow wrapping long URLs (works on HTML code already) $caption = preg_replace('_(/(?!/)|\?|&)_', '\1​', $caption); // Add protocol to the link if missing if (!preg_match('_^(https?|ftp):_i', $link)) { $link = 'http://' . $link; } $this->ApplyStyle(); $this->dst .= '' . $caption . ''; $i += strlen($m[1]) - 1; } // Check mailto URL (not in basic mode) else if (!$onlyBasic && (strpos('mM', $line{$i}) !== false) && preg_match('_\G(?<=[ \t]|^) ( mailto: [!#$%&()*+,\-.0-9:;=?a-z\[\]^\_~]+ @ [0-9a-z]([-.0-9a-z]*[0-9a-z])? ( \?(to|cc|bcc|subject|body)=[!-%\'->@-~]* (&(to|cc|bcc|subject|body)=[!-%\'->@-~]*)* )? ) (?=[ \t!"),.:;?]|$) _ix', $line, $m, 0, $i)) { // Exclude matched ending characters from the link $m[1] = preg_replace('_[!"),.:;?]$_', '', $m[1]); $link = $m[1]; $caption = htmlspecialchars($link, ENT_NOQUOTES); // Add ​ after each [.@?&]|%20|%0A to allow wrapping long URLs (works on HTML code already) $caption = preg_replace('_(mailto:|\.|@|\?|&|%20|%0A)_i', '\1​', $caption); $this->ApplyStyle(); $this->dst .= '' . $caption . ''; $i += strlen($m[1]) - 1; } // Check smiley else if (($code = $this->CheckSmiley($line, $i))) { $this->ApplyStyle(); $this->TranslateSmiley($code); $i += strlen($code) - 1; } // Non-wrapping numbers else if ((strpos('0123456789', $line{$i}) !== false) && preg_match('_([0-9]+([., ][0-9]+)* )[^0-9., ]_', $line, $m, 0, $i)) { $s = htmlspecialchars($m[1], ENT_NOQUOTES); // Do not prevent any wrapping if the found number is very long if (strlen($m[1]) <= 12) { // Replace all spaces by non-breaking spaces (works on HTML code already) $s = str_replace(' ', ' ', $s); } $this->ApplyStyle(); $this->dst .= $s; $i += strlen($m[1]) - 1; } else { $unprocessed = true; } } else // if ($this->desiredStyle[self::STYLE_RAW]) { // Following control codes are *only* valid in raw mode. // Check end of raw mode (only in raw mode) if ($line{$i} == '}' && preg_match('_\G}}}(}*)_', $line, $m, 0, $i)) { $add = strlen($m[1]); if ($add == 0) { $this->desiredStyle[self::STYLE_RAW] = false; $i += 2; } else { $this->ApplyStyle(); $this->dst .= '}}}' . str_repeat('}', $add - 1); $i += 2 + $add; } } else { $unprocessed = true; } } if ($unprocessed) { // This character was not processed by any of the above conditional blocks. $this->ApplyStyle(); if ($this->desiredStyle[self::STYLE_RAW] && $line{$i} == "\t") { $this->dst .= '    '; } else if ($this->desiredStyle[self::STYLE_RAW] && $line{$i} == ' ' && ($i == 0 || $line{$i - 1} == ' ' && substr($this->dst, -1) == ' ')) { $this->dst .= ' '; } else { $this->dst .= htmlspecialchars($line{$i}, ENT_NOQUOTES); } } } } // Clear current styles. // // This is used at the end of blocks (e.g. paragraphs) to end all active inline formatting. // // all = (bool) Set to {{true}} only at the global end of the input to close all block styles, too. // private function ClearStyle($all = false) { if ($all) { $this->desiredStyle = array(); } else { // Some styles should not be cleared $keepStyle = array(); $keepStyle[self::STYLE_LEFT] = $this->desiredStyle[self::STYLE_LEFT]; $keepStyle[self::STYLE_RIGHT] = $this->desiredStyle[self::STYLE_RIGHT]; $keepStyle[self::STYLE_CENTRE] = $this->desiredStyle[self::STYLE_CENTRE]; $keepStyle[self::STYLE_BOX] = $this->desiredStyle[self::STYLE_BOX]; $keepStyle[self::STYLE_OT] = $this->desiredStyle[self::STYLE_OT]; $keepStyle[self::STYLE_RAW] = $this->desiredStyle[self::STYLE_RAW]; $this->desiredStyle = $keepStyle; } } // Applies the desired style and writes all necessary HTML output. // // This matches $desiredStyle with $actualStyle and does everything necessary so that after // this method call, the desired style is active in the HTML output. // private function ApplyStyle() { // Step 1: // Walk through the actual styles' stack bottom-up and find the first item to be closed. $closeFromStackPos = -1; for ($x = 0; $x < count($this->styleStack); $x++) { $styleId = $this->styleStack[$x]; if ($this->actualStyle[$styleId] != $this->desiredStyle[$styleId]) { $closeFromStackPos = $x; break; } } // From this one on, every item is closed top-down. if ($closeFromStackPos >= 0) { while (count($this->styleStack) > $closeFromStackPos) { $styleId = array_pop($this->styleStack); $data = $this->desiredStyle[$styleId]; $this->dst .= $this->TranslateStyle($styleId, false, $data); $this->actualStyle[$styleId] = false; } } // Step 2: // Find every item to be opened (which now includes the ones that were closed for correct HTML tag nesting). for ($styleId = 0; $styleId <= self::STYLE_ID_MAX; $styleId++) { if ($this->desiredStyle[$styleId] && !$this->actualStyle[$styleId]) { $data = $this->desiredStyle[$styleId]; $this->dst .= $this->TranslateStyle($styleId, true, $data); $this->actualStyle[$styleId] = $data; array_push($this->styleStack, $styleId); } } } // Translates style IDs into HTML. // // styleId = (int) See UnbMarkup::STYLE_* constants // openOrClose = (bool) {{true}} to open the style, {{false}} to close it // data = Optional style-specific data // private function TranslateStyle($styleId, $openOrClose, $data = null) { switch ($styleId) { case self::STYLE_LEFT: return $openOrClose ? '
' : '
'; case self::STYLE_RIGHT: return $openOrClose ? '
' : '
'; case self::STYLE_CENTRE: return $openOrClose ? '
' : '
'; case self::STYLE_BOX: return $openOrClose ? '
' : '
'; case self::STYLE_OT: return $openOrClose ? '
' . $this->TranslateText('off topic') . '
' : '
'; case self::STYLE_PAR: if ($openOrClose) { if (is_int($data) && $data > 0) { // Indentation level return '

'; } else { return '

'; } } else { return '

'; } case self::STYLE_EM: return $openOrClose ? '' : ''; case self::STYLE_STRONG: return $openOrClose ? '' : ''; case self::STYLE_MONO: return $openOrClose ? '' : ''; case self::STYLE_COLOUR: if ($openOrClose) { if (is_string($data) && $data != '') { // Text colour return ''; } else { // Quite useless... should never happen... return ''; } } else { return ''; } case self::STYLE_BACKC: if ($openOrClose) { if (is_string($data) && $data != '') { // Background colour return ''; } else { // Quite useless... should never happen... return ''; } } else { return ''; } case self::STYLE_SIZE: if ($openOrClose) { if (is_float($data) && $data > 0) { // Text size return ''; } else { // Quite useless... should never happen... return ''; } } else { return ''; } case self::STYLE_BIG: return $openOrClose ? '' : ''; case self::STYLE_SMALL: return $openOrClose ? '' : ''; case self::STYLE_ULINE: return $openOrClose ? '' : ''; case self::STYLE_STRIKE: return $openOrClose ? '' : ''; case self::STYLE_SUB: return $openOrClose ? '' : ''; case self::STYLE_SUP: return $openOrClose ? '' : ''; case self::STYLE_RAW: return $openOrClose ? '' : ''; } } // Determines whether there's a smiley code at a given position. // // line = (string) Line to analyse // i = (int) Position in the line, where to look for a smiley code // // Returns (string) smiley code that was found, or {{false}} otherwise // private function CheckSmiley($line, $i) { // For efficiency, first check whether the current character can be the beginning // of a smiley at all. Only then call the regular expression stuff. if (strpos($this->smileyStarts, $line{$i}) !== false && preg_match('_\G(?<=[ \t]|^)(' . $this->smileyRegexps . ')(?=[ \t!"),.:;?]|$)_', $line, $m, 0, $i)) { return $m[1]; } // We haven't found a smiley here. return false; } // Translates a smiley code into HTML. // // code = (string) Smiley code // private function TranslateSmiley($code) { $image = $this->smileyImages[$code]; if ($image) { $this->dst .= '' . htmlspecialchars($code) . ''; } else { // Should never happen... $this->dst .= htmlspecialchars($code, ENT_NOQUOTES); } } // Applies the new list type. // // This works similar to ApplyStyle by closing all open list items to get the new desired list // type opened for further input. // // type = (string) New list type (string of the * and # symbols from the beginning of a line) // private function UpdateList($type) { // Find at what character index both strings differ first // Returns -1 for equal strings #function strdifferindex($a, $b) #{ # for ($i = 0; $i < min(strlen($a), strlen($b)); $i++) # { # if ($a{$i} != $b{$i}) return $i; # } # if (strlen($a) == strlen($b)) return -1; # return $i; #} // Find the highest-level difference from the list mode of the previous line. // Find at what character index both strings differ first. #$firstDiff = strdifferindex($this->listType, $type); // Begin of strdifferindex inclusion $firstDiff = -2; for ($i = 0; $i < min(strlen($this->listType), strlen($type)); $i++) { if ($this->listType{$i} != $type{$i}) { $firstDiff = $i; break; } } if ($firstDiff == -2) { if (strlen($this->listType) == strlen($type)) $firstDiff = -1; else $firstDiff = $i; } // End of strdifferindex inclusion if ($type != '' && $firstDiff == -1) { // We're in a list and this item has the same mode as the previous line: no change, new list item. $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= '' . "\n" . '
  • '; } else if ($firstDiff >= 0) { // There's a new list mode. // Step 1: // Close all open lists down to the common basis. $origLastListType = $this->listType; while (strlen($this->listType) > $firstDiff) { $lastMode = substr($this->listType, -1); if ($lastMode == '*') { $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= '
  • ' . "\n" . '' . "\n"; } else if ($lastMode == '#') { $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= '' . "\n" . '' . "\n"; } $this->listType = substr($this->listType, 0, -1); } if (strlen($type) > 0 && strlen($type) < strlen($origLastListType)) { // We're back on a higher level. Don't touch the previous current-level item again // when returning from a nested list, so we close list items for one more level. $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= '' . "\n" . '
  • '; } // Step 2: // Open new lists. for ($i = $firstDiff; $i < strlen($type); $i++) { $newMode = $type{$i}; if ($newMode == '*') { $this->dst .= '
      ' . "\n" . '
    • '; } else if ($newMode == '#') { $this->dst .= '
        ' . "\n" . '
      1. '; } $this->listType .= $newMode; } } } // Applies the new block quote level. // // This works similar to ApplyStyle by closing all open block quotes to get the new desired // quote level opened for further input. // // level = (int) New quote level (0 is no quote) // private function UpdateBlockQuote($level) { if ($level > $this->quoteLevel) { // We're already in a block quotation, but the level has increased. // Add the new levels now. $this->dst .= str_repeat('
        ' . "\n", $level - $this->quoteLevel); $this->quoteLevel = $level; } else if ($level < $this->quoteLevel) { // We're already in a block quotation, but the level has decreased - but not to zero. // Remove the levels now. $this->ClearStyle(); $this->ApplyStyle(); $this->dst .= str_repeat('
        ' . "\n", $this->quoteLevel - $level); $this->quoteLevel = $level; } } // Applies the new table state. // // active = (bool) {{true}} to open a table, {{false}} to close it // inQuote = (bool) {{true}} if in a quote block, {{false}} otherwise // private function UpdateTable($active, $inQuote = false) { if ($active && !$this->tableLine) { // New table started. if (!$inQuote) $this->UpdateBlockQuote(0); if ($this->desiredStyle[self::STYLE_PAR]) { // Tables must not be contained in a paragraph. $this->desiredStyle[self::STYLE_PAR] = false; $this->ApplyStyle(); } $this->maxTableColumns = 0; // Initialise value $this->dst .= '' . "\n"; $this->tableLine = true; } else if (!$active && $this->tableLine) { // Now outside of a table. $this->dst .= '
        ' . "\n"; $this->maxTableColumns = 0; // Clean up value $this->tableLine = false; } } // Resolves a link specification into a URL. // // link = (string) Link from markup // caption = (string) Caption from markup, if available // // Returns (array(link, caption, useWordExtra, rawCaption)) if the link was recognised, // optionally with a third item that tells whether the link shall be expanded to the entire // word it was found in and a fourth item that determines whether the caption shall be used // as uninterpreted raw (HTML) output, like for images. // {{false}} if the link is invalid and shall be left unchanged in the output. // private function ResolveLink($link, $caption) { // Check for forced links (like linking to an image without automatically inlining it) if (preg_match('_^url:(.*)$_i', $link, $m)) { if (!strlen($caption)) $caption = $m[1]; return array($m[1], $caption); } // Check for an uploaded file by its name if (preg_match('_^file:(.*)$_i', $link, $m)) { if (!strlen($caption)) $caption = $this->TranslateText('file') . ' ' . $m[1]; // Resolve uploaded file name to a clickable URL for the browser if (is_numeric($m[1])) $link = 'get_data.php?messageid=' . $m[1]; else $link = 'get_data.php?pagename=' . urlencode($m[1]); // TODO: This link is application dependent! return array($link, $caption); } // Check for an uploaded image by its name if (preg_match('_^image:(.*)$_i', $link, $m)) { // Resolve uploaded image file name to HTML code to display the image $link = $m[1]; $size = 0; if (preg_match('_(:([0-9]+))$_i', $m[1], $m2)) { $link = substr($link, 0, -strlen($m2[1])); // Cut off parameters from the link $size = intval($m2[2]); } if (is_numeric($link)) { $imageFile = 'get_data.php?messageid=' . $link . '&forimg=1'; $link = 'view_message.php?messageid=' . $link; } else { $imageFile = 'get_data.php?pagename=' . urlencode($link) . '&forimg=1'; $link = 'view_message.php?pagename=' . urlencode($link); } // TODO: This link is application dependent! if (strlen($caption)) $caption2 = $caption; // Only use caption2 if a caption is set, don't use automatic value in caption2 if (!strlen($caption)) $caption = $this->TranslateText('image') . ' ' . $link; $html = '' . htmlspecialchars($caption) . ' 0) $html .= 'style="max-width: ' . $size . 'px; max-height: ' . $size . 'px;" '; else $html .= 'style="max-width: 100%;" '; $html .= '/>'; return array($link, $html, false, true); } // Check for a link to Google if (preg_match('_^google:(.*)$_i', $link, $m)) { $useWordExtra = !strlen($caption); if (!strlen($caption)) $caption = str_replace('_', ' ', $m[1]) . ' ' . $this->TranslateText('google web search'); $link = $this->TranslateText('google web search link'); $link = str_replace('{q}', urlencode(str_replace('_', ' ', $m[1])), $link); return array($link, $caption, $useWordExtra); } // Check for a link to Wikipedia if (preg_match('_^wikipedia:(.*)$_i', $link, $m)) { $useWordExtra = !strlen($caption); if (!strlen($caption)) $caption = str_replace('_', ' ', $m[1]) . ' ' . $this->TranslateText('wikipedia'); $link = $this->TranslateText('wikipedia link'); $link = str_replace('{q}', urlencode(str_replace('_', ' ', $m[1])), $link); return array($link, $caption, $useWordExtra); } // Check for generic link if (preg_match('_^(https?|ftp|mailto|xmpp):_i', $link)) { if (!strlen($caption)) $caption = $link; return array($link, $caption); } // Check for namespace if (preg_match('_^[-0-9a-z\_]*:_i', $link)) { // We don't currently support other namespaces, so ignore the link return false; } // Everything else is links to other Wiki pages with no protocol/namespace set. $useWordExtra = false; if (!strlen($caption)) { $caption = $link; if ($link{0} != '#') { // Word expansion is not used for Anchor links on the same page - TODO: Why? $useWordExtra = true; } } $link = 'view_message.php?pagename=' . urlencode($link); // TODO: This link is application dependent! return array($link, $caption, $useWordExtra); } // Gets the text translation for a specified language. // // This function calls UnbLocale::Translate if that class is defined. It uses a static internal // set of words in English language otherwise. // // key = (string) Translation text key. // // Returns (string) Translated string. // private function TranslateText($key) { $str = null; if (class_exists('UnbLocale')) $str = UnbLocale::Translate('markup:' . $key); if (isset($str)) return $str; switch ($key) { case 'off topic': return 'Off Topic:'; case 'file': return 'File:'; case 'image': return 'Image:'; case 'google web search': return '(Google Web Search)'; case 'google web search link': return '{q}'; case 'wikipedia': return '(Wikipedia)'; case 'wikipedia link': return '{q}'; } } // Gets the quoted representation of another content. // // str = (string) Text to quote. // // Returns (string) Quoted string, for use in replies. // public static function GetQuotedText($str) { if (strlen($str)) { $str = preg_replace('_^>_m', '>>', $str); $str = preg_replace('_^$_m', '>', $str); $str = preg_replace('_^(?!>|$)_m', '> ', $str); } return $str; } } // class UnbMarkup ?>