array('file' => image filename, 'width'/'height' => image dimensions)
private $smileyImages = array();
// Cache combined regular expression for all smileys
private $smileyRegexps = '';
// Cache all possible characters that a smiley can start with, in one string
private $smileyStarts = '';
// Like $smileyStarts, for use in a regex character class
private $smileyStartsRegexpCC = '';
public function __construct()
{
$this->smileyPath = 'img/smileys/';
$this->smileyImages = array(
':@' => array('file' => 'angry.png', 'width' => 11, 'height' => 11),
':-@' => array('file' => 'angry.png', 'width' => 11, 'height' => 11),
'|(' => array('file' => 'annoyed.png', 'width' => 11, 'height' => 11),
'|-(' => array('file' => 'annoyed.png', 'width' => 11, 'height' => 11),
':$' => array('file' => 'blush.png', 'width' => 11, 'height' => 11),
':-$' => array('file' => 'blush.png', 'width' => 11, 'height' => 11),
'B)' => array('file' => 'cool.png', 'width' => 11, 'height' => 11),
'B-)' => array('file' => 'cool.png', 'width' => 11, 'height' => 11),
';(' => array('file' => 'cry.png', 'width' => 11, 'height' => 11),
';-(' => array('file' => 'cry.png', 'width' => 11, 'height' => 11),
":'(" => array('file' => 'cry.png', 'width' => 11, 'height' => 11),
':D' => array('file' => 'grin.png', 'width' => 11, 'height' => 11),
':-D' => array('file' => 'grin.png', 'width' => 11, 'height' => 11),
'^^' => array('file' => 'happy.png', 'width' => 11, 'height' => 11),
'^_^' => array('file' => 'happy.png', 'width' => 11, 'height' => 11),
'?(' => array('file' => 'huh.png', 'width' => 11, 'height' => 11),
'?-(' => array('file' => 'huh.png', 'width' => 11, 'height' => 11),
':|' => array('file' => 'indifferent.png', 'width' => 11, 'height' => 11),
':-|' => array('file' => 'indifferent.png', 'width' => 11, 'height' => 11),
':p' => array('file' => 'razz.png', 'width' => 11, 'height' => 11),
':-p' => array('file' => 'razz.png', 'width' => 11, 'height' => 11),
'8)' => array('file' => 'rolleyes.png', 'width' => 11, 'height' => 11),
'8-)' => array('file' => 'rolleyes.png', 'width' => 11, 'height' => 11),
':(' => array('file' => 'sad.png', 'width' => 11, 'height' => 11),
':-(' => array('file' => 'sad.png', 'width' => 11, 'height' => 11),
':O' => array('file' => 'scared.png', 'width' => 11, 'height' => 11),
':-O' => array('file' => 'scared.png', 'width' => 11, 'height' => 11),
':/' => array('file' => 'sceptic.png', 'width' => 11, 'height' => 11),
':-/' => array('file' => 'sceptic.png', 'width' => 11, 'height' => 11),
'8(' => array('file' => 'shocked.png', 'width' => 11, 'height' => 11),
'8-(' => array('file' => 'shocked.png', 'width' => 11, 'height' => 11),
':)' => array('file' => 'smile.png', 'width' => 11, 'height' => 11),
':-)' => array('file' => 'smile.png', 'width' => 11, 'height' => 11),
';)' => array('file' => 'wink.png', 'width' => 11, 'height' => 11),
';-)' => array('file' => 'wink.png', 'width' => 11, 'height' => 11),
'|O' => array('file' => 'yawn.png', 'width' => 11, 'height' => 11),
'|-O' => array('file' => 'yawn.png', 'width' => 11, 'height' => 11),
);
// Prepare smiley working variables
$this->smileyRegexps = '';
$this->smileyStarts = '';
$this->smileyStartsRegexpCC = '';
foreach ($this->smileyImages as $code => $image)
{
// Collect all codes' first character for later fast evaluation
if (strpos($this->smileyStarts, $code{0}) === false)
{
$this->smileyStarts .= $code{0};
$this->smileyStartsRegexpCC .=
str_replace(
array('\\', '-', '[', ']', '_', '^'),
array('\\\\', '\-', '\[', '\]', '\_', '\^'),
$code{0});
// Replace \ character as the very first!
}
$this->smileyRegexps .= ($this->smileyRegexps ? '|' : '') .
str_replace(
array('\\', '$', '(', ')', '*', '+', '.', '?', '[', ']', '^', '_', '|'),
array('\\\\', '\$', '\(', '\)', '\*', '\+', '\.', '\?', '\[', '\]', '\^', '\_', '\|'),
$code);
// Replace \ character as the very first!
}
}
// Sets a new smiley image path.
//
public function SetSmileyPath($path)
{
if (!is_string($path))
throw new InvalidArgumentException('Invalid argument type: $path');
if (substr($path, -1) != '/') $path .= '/';
$this->smileyPath = $path;
}
// Sets a new header level offset.
//
// headerLevelOffset = (int) Offset, from 0 to 5
//
public function SetHeaderLevelOffset($headerLevelOffset)
{
if (!is_int($headerLevelOffset))
throw new InvalidArgumentException('Invalid argument type: $headerLevelOffset');
if ($headerLevelOffset < 0 || $headerLevelOffset > 5)
throw new InvalidArgumentException('Argument out of range: $headerLevelOffset');
$this->headerLevelOffset = $headerLevelOffset;
}
// Resets the internal state.
//
private function ResetState()
{
$this->dst = '';
$this->actualStyle = array();
$this->desiredStyle = array();
$this->styleStack = array();
$this->listType = '';
$this->quoteLevel = 0;
$this->tableLine = false;
$this->tableColumns = 0;
$this->maxTableColumns = 0;
}
// Converts a plain input into HTML output.
//
// src = (string) Plain input to convert
//
// Returns (string) generated HTML
//
public function Convert($src)
{
// Process source code line by line
$this->ResetState();
$lines = explode("\n", $src);
foreach ($lines as $line)
{
$this->ProcessLine(rtrim($line));
}
// Make sure that all opened HTML tags are closed again.
$this->ClearStyle(true);
$this->ApplyStyle();
$dst = $this->dst;
$this->ResetState(); // Clean up and save memory
return $dst;
}
// Post-processes a converted HTML string.
//
// Post-processing can be necessary to resolve generated entities to localised strings or
// other dynamic data. The process of conversion has been split in two phases so that the
// converted output from phase 1 (UnbMarkup::Convert() method) can be cached in a database
// for improved page generation performance.
//
// Returns (string) HTML ready for output
//
public function PostProcess($src)
{
// This is the place to replace session-dependent parts of the pre-translated HTML code
// like text in a language specified by the session user.
if (self::USE_UTF8)
{
// Replace by Unicode U+200B (ZERO WIDTH SPACE)
// Replace by Unicode U+00A0 (NO-BREAK SPACE)
$src = str_replace(
array('', ' '),
array("\xE2\x80\x8C", "\xC2\xA0"),
$src);
}
else
{
// Throw away since it can only represented with Unicode
// Replace by ASCII A0h (NO-BREAK SPACE)
$src = str_replace(
array('', ' '),
array('', "\xA0"),
$src);
}
return $src;
}
// Processes a complete line from the input.
//
// line = (string) Line content
// inQuote = (bool) Value indicating whether the line is in a quote block
//
private function ProcessLine($line, $inQuote = false)
{
// Check captions (not in a paragraph or raw mode)
if ($line{0} === '=' &&
preg_match('_^=(=+) (.*) =\1$_', $line, $m) &&
!$this->desiredStyle[self::STYLE_PAR] &&
!$this->desiredStyle[self::STYLE_RAW])
{
$level = strlen($m[1]) + $this->headerLevelOffset;
$caption = trim($m[2]);
$level = min(6, $level); // Limit the level to a maximum of 6
$this->UpdateList('');
$this->UpdateTable(false);
if (!$inQuote) $this->UpdateBlockQuote(0);
$this->dst .= '';
$this->ProcessInline($caption, true);
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= '' . "\n";
}
// Check line beginning with spaces (not in a paragraph or raw mode)
else if (($line{0} === "\t" || $line{0} === ' ') &&
preg_match('_^(?:(\t+)|( +))_', $line, $m) &&
!$this->desiredStyle[self::STYLE_PAR] &&
!$this->desiredStyle[self::STYLE_RAW])
{
$level = strlen($m[1]); // One tab is one level
if (!$level)
$level = intval(round(strlen($m[2]) / 4.0)); // Four spaces is one level, rounded
if ($this->listType != '')
{
$this->dst .= '
' . "\n";
}
else
{
$this->UpdateTable(false);
if (!$inQuote) $this->UpdateBlockQuote(0);
$this->desiredStyle[self::STYLE_PAR] = $level;
$this->ApplyStyle();
}
$this->ProcessInline(ltrim($line));
}
// Check line beginning with closing angles (blockquotes) (not in raw mode or when re-processing from a quotation)
else if ($line{0} === '>' &&
preg_match('_^(>(?: ?>)*)(| (.*))$_', $line, $m) &&
!$this->desiredStyle[self::STYLE_RAW] &&
!$inQuote)
{
$level = strlen(str_replace(' ', '', $m[1]));
$content = $m[3];
if ($level != $this->quoteLevel)
{
// Quote level has changed, clean up structure.
if ($this->desiredStyle[self::STYLE_PAR])
{
// New blockquotes must not be contained in a paragraph.
$this->desiredStyle[self::STYLE_PAR] = false;
$this->ApplyStyle();
}
$this->UpdateList('');
$this->UpdateTable(false);
}
$this->UpdateBlockQuote($level);
$this->ProcessLine($content, true); // Process the remainder of the line like a line of itself
}
// Check separator line (not in raw mode)
else if ($line{0} === '-' &&
preg_match('_^----+$_', $line, $m) &&
!$this->desiredStyle[self::STYLE_RAW])
{
$this->UpdateList('');
if (!$inQuote) $this->UpdateBlockQuote(0);
$this->ClearStyle();
$this->ApplyStyle();
if ($this->tableLine && $this->maxTableColumns > 0)
{
$this->dst .= '
' . "\n";
$this->dst .= ' | ' . "\n";
$this->dst .= '
' . "\n";
}
else
{
$this->dst .= '' . "\n";
}
}
// Check list item (not in raw mode)
else if (($line{0} === '#' || $line{0} === '*') &&
preg_match('_^([#*][#* \t]*[ \t])(.*)$_', $line, $m) &&
!$this->desiredStyle[self::STYLE_RAW])
{
$type = str_replace(array("\t", ' '), array('', ''), $m[1]);
$content = $m[2];
$this->UpdateTable(false);
if (!$inQuote) $this->UpdateBlockQuote(0);
if ($this->desiredStyle[self::STYLE_PAR])
{
// Lists must not be contained in a paragraph.
$this->desiredStyle[self::STYLE_PAR] = false;
$this->ApplyStyle();
}
$this->UpdateList($type);
$this->ProcessInline($content);
}
// Check tables (not in raw mode)
else if ($line{0} === '|' &&
preg_match('_^\|(.*)\|$_', $line, $m) &&
!$this->desiredStyle[self::STYLE_RAW])
{
$content = $m[1];
$this->UpdateTable(true, $inQuote);
$this->tableColumns = 1; // Initialise value
$this->dst .= '' . "\n";
$this->dst .= '';
$this->ProcessInline($content);
$this->dst .= ' | ' . "\n";
$this->dst .= '
' . "\n";
if ($this->tableColumns > $this->maxTableColumns)
{
$this->maxTableColumns = $this->tableColumns;
}
$this->tableColumns = 0; // Clean up value
}
// Check empty lines (not in raw mode)
else if (!strlen($line) &&
!$this->desiredStyle[self::STYLE_RAW])
{
// Close everything, especially a paragraph.
// Multiple subsequent empty lines don't have any effect.
$this->UpdateList('');
$this->UpdateTable(false);
if (!$inQuote) $this->UpdateBlockQuote(0);
$this->ClearStyle();
$this->ApplyStyle();
}
else
{
$this->UpdateList('');
$this->UpdateTable(false);
if (!$inQuote) $this->UpdateBlockQuote(0);
if (!$this->desiredStyle[self::STYLE_PAR])
{
// Every text must be contained in a paragraph.
$this->desiredStyle[self::STYLE_PAR] = true;
}
$this->ProcessInline($line);
if (!$this->actualStyle[self::STYLE_PAR])
{
// There was no visible content in the line that would have triggered the
// paragraph style. Reset it so that this line appears empty.
$this->desiredStyle[self::STYLE_PAR] = false;
}
if ($this->desiredStyle[self::STYLE_RAW])
{
// In raw mode, line endings are preserved.
$this->dst .= '
';
}
$this->dst .= "\n";
}
}
// Processes inline input.
//
// line = (string) Input
// onlyBasic = (bool) Only process the very basic formatting, ignore other formatting markup
//
private function ProcessInline($line, $onlyBasic = false)
{
for ($i = 0; $i < strlen($line); $i++)
{
if (!$this->desiredStyle[self::STYLE_RAW])
{
// If not in raw mode, we can skip to the next interesting character in one step.
// This will jump over areas of the source text that are plain text and contain
// no control codes. This saves about 90% of the execution time for processing
// source texts with something like average control code ratio.
// NOTE: Update this RegExp to add new control characters that are handled at other
// places.
if (preg_match("_['{}\[|<" . $this->smileyStartsRegexpCC . "0-9]|(?i)https?:|ftp:|www\.|mailto:_", $line, $m, PREG_OFFSET_CAPTURE, $i))
{
$nextIndex = $m[0][1];
if ($nextIndex != $i)
{
// Only process the text until nextIndex if we're not already right there.
$this->ApplyStyle();
$this->dst .= htmlspecialchars(substr($line, $i, $nextIndex - $i));
$i = $nextIndex;
}
}
else
{
// No interesting character found, skip to the end of the line.
$this->ApplyStyle();
$this->dst .= htmlspecialchars(substr($line, $i));
break;
}
}
$unprocessed = false;
if (!$this->desiredStyle[self::STYLE_RAW])
{
// Following control codes are *not* valid in raw mode.
// Check emphasising
if ($line{$i} === "'" &&
preg_match("_\\G'('+)_", $line, $m, 0, $i))
{
$level = strlen($m[1]);
$level = min(3, $level); // Limit the level to a maximum of 3
if ($level & 1)
$this->desiredStyle[self::STYLE_EM] = !$this->desiredStyle[self::STYLE_EM];
if ($level & 2)
$this->desiredStyle[self::STYLE_STRONG] = !$this->desiredStyle[self::STYLE_STRONG];
$i += strlen($m[1]);
}
// Check raw mode (not in basic mode)
else if (!$onlyBasic &&
$line{$i} === '{' &&
preg_match('_\G{{{_', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_RAW] = true;
$i += 2;
}
// Check monospace (not in monospace mode)
else if ($line{$i} === '{' &&
!$this->desiredStyle[self::STYLE_MONO] &&
preg_match('_\G{{_', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_MONO] = true;
$i += 1;
}
// Check end of monospace (only in monospace mode)
else if ($line{$i} === '}' &&
$this->desiredStyle[self::STYLE_MONO] &&
preg_match('_\G}}_', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_MONO] = false;
$i += 1;
}
// Check anchor definition
else if ($line{$i} === '[' &&
preg_match('_\G\[\[=(\S+?)\]\]_i', $line, $m, 0, $i))
{
$anchor = trim($m[1]);
$this->dst .= '';
$i += strlen($m[1]) + 5 - 1;
}
// Check hyperlink/image (not in basic mode)
else if (!$onlyBasic &&
$line{$i} === '[' &&
preg_match('_\G\[\[(.*?)\]\]([0-9a-z]*)_i', $line, $m, 0, $i))
{
$wordExtra = $m[2];
// Check link caption
$parts = explode(' ', trim($m[1]), 2);
$link = $parts[0];
$caption = trim($parts[1]);
$this->ApplyStyle();
if (preg_match('_^(https?|ftp)://[-.0-9a-z]+/[^?]+\.(bmp|gif|ico|jp2|jpg|jpeg|png|svg)$_i', urldecode($link)))
{
// This is an image
if (!strlen($caption))
$caption = $link;
$this->dst .= '';
$i += strlen($m[1]) + 4 - 1;
}
else
{
$resLink = $this->ResolveLink($link, $caption);
if ($resLink)
{
$link = $resLink[0];
$caption = $resLink[1];
$useWordExtra = $resLink[2];
$rawCaption = $resLink[3];
if ($useWordExtra && $wordExtra != '')
{
$caption .= $wordExtra;
}
else
{
$wordExtra = '';
}
$this->dst .= '';
if ($rawCaption)
{
// This would not support inline formatting in a link caption:
#$this->dst .= htmlspecialchars($caption, ENT_NOQUOTES);
$this->dst .= $caption;
}
else
{
// Backup and reset styles
$backupActualStyle = $this->actualStyle;
$backupDesiredStyle = $this->desiredStyle;
$backupStyleStack = $this->styleStack;
$this->actualStyle = array();
$this->desiredStyle = array();
$this->styleStack = array();
$this->ProcessInline($caption, true);
$this->ClearStyle();
$this->ApplyStyle();
// Restore styles
$this->actualStyle = $backupActualStyle;
$this->desiredStyle = $backupDesiredStyle;
$this->styleStack = $backupStyleStack;
}
$this->dst .= '';
$i += strlen($m[1]) + 4 - 1 + strlen($wordExtra);
}
else
{
// We were signalled that the link is invalid and should not be made clickable.
$this->dst .= '['; // Don't drop the current character when we don't process it
}
}
}
// Check table column delimiter (only in table mode, not in basic mode)
else if (!$onlyBasic &&
$line{$i} === '|' &&
$this->tableLine /*&&
preg_match('_\G\|_', $line, $m, 0, $i)*/)
{
$this->ClearStyle();
$this->ApplyStyle();
$this->tableColumns++;
$this->ApplyStyle();
$this->dst .= '' . "\n" . '';
}
// Check left-aligned floating box (not in left/right-align or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_LEFT] &&
!$this->desiredStyle[self::STYLE_RIGHT] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_LEFT] = true;
$i += strlen($m[1]) - 1;
}
// Check end of left-aligned floating box (only in left-align mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_LEFT] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->ClearStyle();
$this->desiredStyle[self::STYLE_LEFT] = false;
$i += strlen($m[1]) - 1;
}
// Check right-aligned floating box (not in left/right-align or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_LEFT] &&
!$this->desiredStyle[self::STYLE_RIGHT] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_RIGHT] = true;
$i += strlen($m[1]) - 1;
}
// Check end of right-aligned floating box (only in right-align mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_RIGHT] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->ClearStyle();
$this->desiredStyle[self::STYLE_RIGHT] = false;
$i += strlen($m[1]) - 1;
}
// Check centre-aligned box (not in centre-align or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_CENTRE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_CENTRE] = true;
$i += strlen($m[1]) - 1;
}
// Check end of centre-aligned floating box (only in centre-align mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_CENTRE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->ClearStyle();
$this->desiredStyle[self::STYLE_CENTRE] = false;
$i += strlen($m[1]) - 1;
}
// Check bordered box (not in box or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_BOX] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_BOX] = true;
$i += strlen($m[1]) - 1;
}
// Check end of bordered box (only in box mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_BOX] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->ClearStyle();
$this->desiredStyle[self::STYLE_BOX] = false;
$i += strlen($m[1]) - 1;
}
// Check off topic (not in off-topic or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_OT] &&
preg_match('_\G(<(?:ot|offtopic)>)_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_OT] = true;
$i += strlen($m[1]) - 1;
}
// Check end of off topic (only in off-topic mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_OT] &&
preg_match('_\G((?:ot|offtopic)>)_i', $line, $m, 0, $i))
{
$this->ClearStyle();
$this->desiredStyle[self::STYLE_OT] = false;
$i += strlen($m[1]) - 1;
}
// Check text colour (not in colour or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_COLOUR] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$colour = $m[2];
$this->desiredStyle[self::STYLE_COLOUR] = $colour;
$i += strlen($m[1]) - 1;
}
// Check end of text colour (only in colour mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_COLOUR] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_COLOUR] = false;
$i += strlen($m[1]) - 1;
}
// Check background colour (not in back-colour or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_BACKC] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$colour = $m[2];
$this->desiredStyle[self::STYLE_BACKC] = $colour;
$i += strlen($m[1]) - 1;
}
// Check end of background colour (only in back-colour mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_BACKC] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_BACKC] = false;
$i += strlen($m[1]) - 1;
}
// Check text size (not in size or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_SIZE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$size = intval($m[2]) / 10.0;
$this->desiredStyle[self::STYLE_SIZE] = $size;
$i += strlen($m[1]) - 1;
}
// Check end of text size (only in size mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_SIZE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SIZE] = false;
$i += strlen($m[1]) - 1;
}
// Check bigger font (not in big mode)
else if ($line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_BIG] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_BIG] = true;
$i += strlen($m[1]) - 1;
}
// Check end of bigger font (only in big mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_BIG] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_BIG] = false;
$i += strlen($m[1]) - 1;
}
// Check smaller font (not in small mode)
else if ($line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_SMALL] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SMALL] = true;
$i += strlen($m[1]) - 1;
}
// Check end of smaller font (only in small mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_SMALL] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SMALL] = false;
$i += strlen($m[1]) - 1;
}
// Check underline (not in underline mode)
else if ($line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_ULINE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_ULINE] = true;
$i += strlen($m[1]) - 1;
}
// Check end of underline (only in underline mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_ULINE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_ULINE] = false;
$i += strlen($m[1]) - 1;
}
// Check strike-through (not in strike-through mode)
else if ($line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_STRIKE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_STRIKE] = true;
$i += strlen($m[1]) - 1;
}
// Check end of strike-through (only in strike-through mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_STRIKE] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_STRIKE] = false;
$i += strlen($m[1]) - 1;
}
// Check subscript (not in subscript or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_SUB] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SUB] = true;
$i += strlen($m[1]) - 1;
}
// Check end of subscript (only in subscript mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_SUB] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SUB] = false;
$i += strlen($m[1]) - 1;
}
// Check superscript (not in superscript or basic mode)
else if (!$onlyBasic &&
$line{$i} === '<' &&
!$this->desiredStyle[self::STYLE_SUP] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SUP] = true;
$i += strlen($m[1]) - 1;
}
// Check end of superscript (only in superscript mode)
else if ($line{$i} === '<' &&
$this->desiredStyle[self::STYLE_SUP] &&
preg_match('_\G()_i', $line, $m, 0, $i))
{
$this->desiredStyle[self::STYLE_SUP] = false;
$i += strlen($m[1]) - 1;
}
// Check manual line break
else if ($line{$i} === '<' &&
preg_match('_\G( )_i', $line, $m, 0, $i))
{
$this->ApplyStyle();
$this->dst .= ' ';
$i += strlen($m[1]) - 1;
}
// Check URL (not in basic mode)
else if (!$onlyBasic &&
(strpos('hfwHFW', $line{$i}) !== false) &&
preg_match('_\G(?<=[ \t]|^)
(
( (https?|ftp):// ([-$%+,.0-9=a-z\_~]*(:[-$%+,.0-9=a-z\_~]*)?@)? | www\. )
[0-9a-z]([-.0-9a-z]*[0-9a-z])?
(/[-!#$%&()+,./0-9:;=?@a-z\[\]\_~]*)?
)
(?=[ \t!"),.:;?]|$)
_ix', $line, $m, 0, $i))
{
// Exclude matched ending characters from the link
$m[1] = preg_replace('_[!"),.:;?]$_', '', $m[1]);
$link = $m[1];
$caption = htmlspecialchars($link, ENT_NOQUOTES);
// Add after each [/?&] to allow wrapping long URLs (works on HTML code already)
$caption = preg_replace('_(/(?!/)|\?|&)_', '\1', $caption);
// Add protocol to the link if missing
if (!preg_match('_^(https?|ftp):_i', $link))
{
$link = 'http://' . $link;
}
$this->ApplyStyle();
$this->dst .= '' . $caption . '';
$i += strlen($m[1]) - 1;
}
// Check mailto URL (not in basic mode)
else if (!$onlyBasic &&
(strpos('mM', $line{$i}) !== false) &&
preg_match('_\G(?<=[ \t]|^)
(
mailto:
[!#$%&()*+,\-.0-9:;=?a-z\[\]^\_~]+
@
[0-9a-z]([-.0-9a-z]*[0-9a-z])?
(
\?(to|cc|bcc|subject|body)=[!-%\'->@-~]*
(&(to|cc|bcc|subject|body)=[!-%\'->@-~]*)*
)?
)
(?=[ \t!"),.:;?]|$)
_ix', $line, $m, 0, $i))
{
// Exclude matched ending characters from the link
$m[1] = preg_replace('_[!"),.:;?]$_', '', $m[1]);
$link = $m[1];
$caption = htmlspecialchars($link, ENT_NOQUOTES);
// Add after each [.@?&]|%20|%0A to allow wrapping long URLs (works on HTML code already)
$caption = preg_replace('_(mailto:|\.|@|\?|&|%20|%0A)_i', '\1', $caption);
$this->ApplyStyle();
$this->dst .= '' . $caption . '';
$i += strlen($m[1]) - 1;
}
// Check smiley
else if (($code = $this->CheckSmiley($line, $i)))
{
$this->ApplyStyle();
$this->TranslateSmiley($code);
$i += strlen($code) - 1;
}
// Non-wrapping numbers
else if ((strpos('0123456789', $line{$i}) !== false) &&
preg_match('_([0-9]+([., ][0-9]+)* )[^0-9., ]_', $line, $m, 0, $i))
{
$s = htmlspecialchars($m[1], ENT_NOQUOTES);
// Do not prevent any wrapping if the found number is very long
if (strlen($m[1]) <= 12)
{
// Replace all spaces by non-breaking spaces (works on HTML code already)
$s = str_replace(' ', ' ', $s);
}
$this->ApplyStyle();
$this->dst .= $s;
$i += strlen($m[1]) - 1;
}
else
{
$unprocessed = true;
}
}
else // if ($this->desiredStyle[self::STYLE_RAW])
{
// Following control codes are *only* valid in raw mode.
// Check end of raw mode (only in raw mode)
if ($line{$i} == '}' &&
preg_match('_\G}}}(}*)_', $line, $m, 0, $i))
{
$add = strlen($m[1]);
if ($add == 0)
{
$this->desiredStyle[self::STYLE_RAW] = false;
$i += 2;
}
else
{
$this->ApplyStyle();
$this->dst .= '}}}' . str_repeat('}', $add - 1);
$i += 2 + $add;
}
}
else
{
$unprocessed = true;
}
}
if ($unprocessed)
{
// This character was not processed by any of the above conditional blocks.
$this->ApplyStyle();
if ($this->desiredStyle[self::STYLE_RAW] &&
$line{$i} == "\t")
{
$this->dst .= ' ';
}
else if ($this->desiredStyle[self::STYLE_RAW] &&
$line{$i} == ' ' &&
($i == 0 || $line{$i - 1} == ' ' && substr($this->dst, -1) == ' '))
{
$this->dst .= ' ';
}
else
{
$this->dst .= htmlspecialchars($line{$i}, ENT_NOQUOTES);
}
}
}
}
// Clear current styles.
//
// This is used at the end of blocks (e.g. paragraphs) to end all active inline formatting.
//
// all = (bool) Set to {{true}} only at the global end of the input to close all block styles, too.
//
private function ClearStyle($all = false)
{
if ($all)
{
$this->desiredStyle = array();
}
else
{
// Some styles should not be cleared
$keepStyle = array();
$keepStyle[self::STYLE_LEFT] = $this->desiredStyle[self::STYLE_LEFT];
$keepStyle[self::STYLE_RIGHT] = $this->desiredStyle[self::STYLE_RIGHT];
$keepStyle[self::STYLE_CENTRE] = $this->desiredStyle[self::STYLE_CENTRE];
$keepStyle[self::STYLE_BOX] = $this->desiredStyle[self::STYLE_BOX];
$keepStyle[self::STYLE_OT] = $this->desiredStyle[self::STYLE_OT];
$keepStyle[self::STYLE_RAW] = $this->desiredStyle[self::STYLE_RAW];
$this->desiredStyle = $keepStyle;
}
}
// Applies the desired style and writes all necessary HTML output.
//
// This matches $desiredStyle with $actualStyle and does everything necessary so that after
// this method call, the desired style is active in the HTML output.
//
private function ApplyStyle()
{
// Step 1:
// Walk through the actual styles' stack bottom-up and find the first item to be closed.
$closeFromStackPos = -1;
for ($x = 0; $x < count($this->styleStack); $x++)
{
$styleId = $this->styleStack[$x];
if ($this->actualStyle[$styleId] != $this->desiredStyle[$styleId])
{
$closeFromStackPos = $x;
break;
}
}
// From this one on, every item is closed top-down.
if ($closeFromStackPos >= 0)
{
while (count($this->styleStack) > $closeFromStackPos)
{
$styleId = array_pop($this->styleStack);
$data = $this->desiredStyle[$styleId];
$this->dst .= $this->TranslateStyle($styleId, false, $data);
$this->actualStyle[$styleId] = false;
}
}
// Step 2:
// Find every item to be opened (which now includes the ones that were closed for correct HTML tag nesting).
for ($styleId = 0; $styleId <= self::STYLE_ID_MAX; $styleId++)
{
if ($this->desiredStyle[$styleId] && !$this->actualStyle[$styleId])
{
$data = $this->desiredStyle[$styleId];
$this->dst .= $this->TranslateStyle($styleId, true, $data);
$this->actualStyle[$styleId] = $data;
array_push($this->styleStack, $styleId);
}
}
}
// Translates style IDs into HTML.
//
// styleId = (int) See UnbMarkup::STYLE_* constants
// openOrClose = (bool) {{true}} to open the style, {{false}} to close it
// data = Optional style-specific data
//
private function TranslateStyle($styleId, $openOrClose, $data = null)
{
switch ($styleId)
{
case self::STYLE_LEFT:
return $openOrClose ? '' : ' ';
case self::STYLE_RIGHT:
return $openOrClose ? '' : ' ';
case self::STYLE_CENTRE:
return $openOrClose ? '' : ' ';
case self::STYLE_BOX:
return $openOrClose ? '' : ' ';
case self::STYLE_OT:
return $openOrClose ? '' . $this->TranslateText('off topic') . ' ' : ' ';
case self::STYLE_PAR:
if ($openOrClose)
{
if (is_int($data) && $data > 0)
{
// Indentation level
return '';
}
else
{
return ' ';
}
}
else
{
return ' ';
}
case self::STYLE_EM:
return $openOrClose ? '' : '';
case self::STYLE_STRONG:
return $openOrClose ? '' : '';
case self::STYLE_MONO:
return $openOrClose ? '' : '';
case self::STYLE_COLOUR:
if ($openOrClose)
{
if (is_string($data) && $data != '')
{
// Text colour
return '';
}
else
{
// Quite useless... should never happen...
return '';
}
}
else
{
return '';
}
case self::STYLE_BACKC:
if ($openOrClose)
{
if (is_string($data) && $data != '')
{
// Background colour
return '';
}
else
{
// Quite useless... should never happen...
return '';
}
}
else
{
return '';
}
case self::STYLE_SIZE:
if ($openOrClose)
{
if (is_float($data) && $data > 0)
{
// Text size
return '';
}
else
{
// Quite useless... should never happen...
return '';
}
}
else
{
return '';
}
case self::STYLE_BIG:
return $openOrClose ? '' : '';
case self::STYLE_SMALL:
return $openOrClose ? '' : '';
case self::STYLE_ULINE:
return $openOrClose ? '' : '';
case self::STYLE_STRIKE:
return $openOrClose ? '' : '';
case self::STYLE_SUB:
return $openOrClose ? '' : '';
case self::STYLE_SUP:
return $openOrClose ? '' : '';
case self::STYLE_RAW:
return $openOrClose ? '' : ' ';
}
}
// Determines whether there's a smiley code at a given position.
//
// line = (string) Line to analyse
// i = (int) Position in the line, where to look for a smiley code
//
// Returns (string) smiley code that was found, or {{false}} otherwise
//
private function CheckSmiley($line, $i)
{
// For efficiency, first check whether the current character can be the beginning
// of a smiley at all. Only then call the regular expression stuff.
if (strpos($this->smileyStarts, $line{$i}) !== false &&
preg_match('_\G(?<=[ \t]|^)(' . $this->smileyRegexps . ')(?=[ \t!"),.:;?]|$)_', $line, $m, 0, $i))
{
return $m[1];
}
// We haven't found a smiley here.
return false;
}
// Translates a smiley code into HTML.
//
// code = (string) Smiley code
//
private function TranslateSmiley($code)
{
$image = $this->smileyImages[$code];
if ($image)
{
$this->dst .= '';
}
else
{
// Should never happen...
$this->dst .= htmlspecialchars($code, ENT_NOQUOTES);
}
}
// Applies the new list type.
//
// This works similar to ApplyStyle by closing all open list items to get the new desired list
// type opened for further input.
//
// type = (string) New list type (string of the * and # symbols from the beginning of a line)
//
private function UpdateList($type)
{
// Find at what character index both strings differ first
// Returns -1 for equal strings
#function strdifferindex($a, $b)
#{
# for ($i = 0; $i < min(strlen($a), strlen($b)); $i++)
# {
# if ($a{$i} != $b{$i}) return $i;
# }
# if (strlen($a) == strlen($b)) return -1;
# return $i;
#}
// Find the highest-level difference from the list mode of the previous line.
// Find at what character index both strings differ first.
#$firstDiff = strdifferindex($this->listType, $type);
// Begin of strdifferindex inclusion
$firstDiff = -2;
for ($i = 0; $i < min(strlen($this->listType), strlen($type)); $i++)
{
if ($this->listType{$i} != $type{$i})
{
$firstDiff = $i;
break;
}
}
if ($firstDiff == -2)
{
if (strlen($this->listType) == strlen($type))
$firstDiff = -1;
else
$firstDiff = $i;
}
// End of strdifferindex inclusion
if ($type != '' && $firstDiff == -1)
{
// We're in a list and this item has the same mode as the previous line: no change, new list item.
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= '' . "\n" . '';
}
else if ($firstDiff >= 0)
{
// There's a new list mode.
// Step 1:
// Close all open lists down to the common basis.
$origLastListType = $this->listType;
while (strlen($this->listType) > $firstDiff)
{
$lastMode = substr($this->listType, -1);
if ($lastMode == '*')
{
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= '' . "\n" . '' . "\n";
}
else if ($lastMode == '#')
{
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= '' . "\n" . '' . "\n";
}
$this->listType = substr($this->listType, 0, -1);
}
if (strlen($type) > 0 && strlen($type) < strlen($origLastListType))
{
// We're back on a higher level. Don't touch the previous current-level item again
// when returning from a nested list, so we close list items for one more level.
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= '' . "\n" . '';
}
// Step 2:
// Open new lists.
for ($i = $firstDiff; $i < strlen($type); $i++)
{
$newMode = $type{$i};
if ($newMode == '*')
{
$this->dst .= '' . "\n" . '- ';
}
else if ($newMode == '#')
{
$this->dst .= '
' . "\n" . '- ';
}
$this->listType .= $newMode;
}
}
}
// Applies the new block quote level.
//
// This works similar to ApplyStyle by closing all open block quotes to get the new desired
// quote level opened for further input.
//
// level = (int) New quote level (0 is no quote)
//
private function UpdateBlockQuote($level)
{
if ($level > $this->quoteLevel)
{
// We're already in a block quotation, but the level has increased.
// Add the new levels now.
$this->dst .= str_repeat('
' . "\n", $level - $this->quoteLevel);
$this->quoteLevel = $level;
}
else if ($level < $this->quoteLevel)
{
// We're already in a block quotation, but the level has decreased - but not to zero.
// Remove the levels now.
$this->ClearStyle();
$this->ApplyStyle();
$this->dst .= str_repeat(' ' . "\n", $this->quoteLevel - $level);
$this->quoteLevel = $level;
}
}
// Applies the new table state.
//
// active = (bool) {{true}} to open a table, {{false}} to close it
// inQuote = (bool) {{true}} if in a quote block, {{false}} otherwise
//
private function UpdateTable($active, $inQuote = false)
{
if ($active && !$this->tableLine)
{
// New table started.
if (!$inQuote) $this->UpdateBlockQuote(0);
if ($this->desiredStyle[self::STYLE_PAR])
{
// Tables must not be contained in a paragraph.
$this->desiredStyle[self::STYLE_PAR] = false;
$this->ApplyStyle();
}
$this->maxTableColumns = 0; // Initialise value
$this->dst .= '' . "\n";
$this->tableLine = true;
}
else if (!$active && $this->tableLine)
{
// Now outside of a table.
$this->dst .= ' ' . "\n";
$this->maxTableColumns = 0; // Clean up value
$this->tableLine = false;
}
}
// Resolves a link specification into a URL.
//
// link = (string) Link from markup
// caption = (string) Caption from markup, if available
//
// Returns (array(link, caption, useWordExtra, rawCaption)) if the link was recognised,
// optionally with a third item that tells whether the link shall be expanded to the entire
// word it was found in and a fourth item that determines whether the caption shall be used
// as uninterpreted raw (HTML) output, like for images.
// {{false}} if the link is invalid and shall be left unchanged in the output.
//
private function ResolveLink($link, $caption)
{
// Check for forced links (like linking to an image without automatically inlining it)
if (preg_match('_^url:(.*)$_i', $link, $m))
{
if (!strlen($caption)) $caption = $m[1];
return array($m[1], $caption);
}
// Check for an uploaded file by its name
if (preg_match('_^file:(.*)$_i', $link, $m))
{
if (!strlen($caption)) $caption = $this->TranslateText('file') . ' ' . $m[1];
// Resolve uploaded file name to a clickable URL for the browser
if (is_numeric($m[1]))
$link = 'get_data.php?messageid=' . $m[1];
else
$link = 'get_data.php?pagename=' . urlencode($m[1]);
// TODO: This link is application dependent!
return array($link, $caption);
}
// Check for an uploaded image by its name
if (preg_match('_^image:(.*)$_i', $link, $m))
{
// Resolve uploaded image file name to HTML code to display the image
$link = $m[1];
$size = 0;
if (preg_match('_(:([0-9]+))$_i', $m[1], $m2))
{
$link = substr($link, 0, -strlen($m2[1])); // Cut off parameters from the link
$size = intval($m2[2]);
}
if (is_numeric($link))
{
$imageFile = 'get_data.php?messageid=' . $link . '&forimg=1';
$link = 'view_message.php?messageid=' . $link;
}
else
{
$imageFile = 'get_data.php?pagename=' . urlencode($link) . '&forimg=1';
$link = 'view_message.php?pagename=' . urlencode($link);
}
// TODO: This link is application dependent!
if (strlen($caption)) $caption2 = $caption; // Only use caption2 if a caption is set, don't use automatic value in caption2
if (!strlen($caption)) $caption = $this->TranslateText('image') . ' ' . $link;
$html = ' 0)
$html .= 'style="max-width: ' . $size . 'px; max-height: ' . $size . 'px;" ';
else
$html .= 'style="max-width: 100%;" ';
$html .= '/>';
return array($link, $html, false, true);
}
// Check for a link to Google
if (preg_match('_^google:(.*)$_i', $link, $m))
{
$useWordExtra = !strlen($caption);
if (!strlen($caption)) $caption = str_replace('_', ' ', $m[1]) . ' ' . $this->TranslateText('google web search');
$link = $this->TranslateText('google web search link');
$link = str_replace('{q}', urlencode(str_replace('_', ' ', $m[1])), $link);
return array($link, $caption, $useWordExtra);
}
// Check for a link to Wikipedia
if (preg_match('_^wikipedia:(.*)$_i', $link, $m))
{
$useWordExtra = !strlen($caption);
if (!strlen($caption)) $caption = str_replace('_', ' ', $m[1]) . ' ' . $this->TranslateText('wikipedia');
$link = $this->TranslateText('wikipedia link');
$link = str_replace('{q}', urlencode(str_replace('_', ' ', $m[1])), $link);
return array($link, $caption, $useWordExtra);
}
// Check for generic link
if (preg_match('_^(https?|ftp|mailto|xmpp):_i', $link))
{
if (!strlen($caption)) $caption = $link;
return array($link, $caption);
}
// Check for namespace
if (preg_match('_^[-0-9a-z\_]*:_i', $link))
{
// We don't currently support other namespaces, so ignore the link
return false;
}
// Everything else is links to other Wiki pages with no protocol/namespace set.
$useWordExtra = false;
if (!strlen($caption))
{
$caption = $link;
if ($link{0} != '#')
{
// Word expansion is not used for Anchor links on the same page - TODO: Why?
$useWordExtra = true;
}
}
$link = 'view_message.php?pagename=' . urlencode($link);
// TODO: This link is application dependent!
return array($link, $caption, $useWordExtra);
}
// Gets the text translation for a specified language.
//
// This function calls UnbLocale::Translate if that class is defined. It uses a static internal
// set of words in English language otherwise.
//
// key = (string) Translation text key.
//
// Returns (string) Translated string.
//
private function TranslateText($key)
{
$str = null;
if (class_exists('UnbLocale'))
$str = UnbLocale::Translate('markup:' . $key);
if (isset($str))
return $str;
switch ($key)
{
case 'off topic': return 'Off Topic:';
case 'file': return 'File:';
case 'image': return 'Image:';
case 'google web search': return '(Google Web Search)';
case 'google web search link': return 'http://www.google.de/search?&q={q}';
case 'wikipedia': return '(Wikipedia)';
case 'wikipedia link': return 'http://de.wikipedia.org/wiki/Spezial:Suche?go=Artikel&search={q}';
}
}
// Gets the quoted representation of another content.
//
// str = (string) Text to quote.
//
// Returns (string) Quoted string, for use in replies.
//
public static function GetQuotedText($str)
{
if (strlen($str))
{
$str = preg_replace('_^>_m', '>>', $str);
$str = preg_replace('_^$_m', '>', $str);
$str = preg_replace('_^(?!>|$)_m', '> ', $str);
}
return $str;
}
} // class UnbMarkup
?>
|