diff --git a/tw/error/FOO_error.php b/tw/error/FOO_error.php new file mode 100644 index 000000000..3924768ee --- /dev/null +++ b/tw/error/FOO_error.php @@ -0,0 +1,43 @@ +TW_errors($options); + } + + function add($errval, $position, $show_code, $param1=null, $param2=null ) + { + $this->ErrorArray["TW_err_".$this->identifier++] = array($errval, $param1, $param2, $position, $show_code); + if($errval & 0xff00) $this->IsError = 1; + return null; + } +} +?> \ No newline at end of file diff --git a/tw/error/LINK_error.php b/tw/error/LINK_error.php new file mode 100644 index 000000000..580e15fbc --- /dev/null +++ b/tw/error/LINK_error.php @@ -0,0 +1,94 @@ +TW_errors($options); + } + + function add($errval, $position, $show_code, $param1=null, $param2=null ) + { + $err_id = "TW_err_".$this->identifier++; + $this->ErrorArray[$err_id] = array($errval, $param1, $param2, $position, $show_code); + if($errval & 0xff00) $this->IsError = 1; + + // append errors and warnings + if(($errval & 0x0ff0) && $show_code != null ) + return ''.$show_code.''; + + return null; + } + + // You can call this method from your project and create nice list of errors. + // + // $lang - error string pack + // $mask - any combination of following masks: + // 0xf000 - internal, + // 0x0f00 - errors, + // 0x00f0 - warnings, + // 0x000f - comments + // + function create_list (&$lang, $mask = 0x0ff0) + { + $errors = $this->get_by_mask($mask); + + $out = ""; + } + + function get_error_class ($error) + { + if($error&0xf000) return "tw-err-int"; + if($error&0x0f00) return "tw-err-err"; + if($error&0x00f0) return "tw-err-war"; + return "tw-err-com"; + } +} +?> \ No newline at end of file diff --git a/tw/error/__Template_error.php b/tw/error/__Template_error.php new file mode 100644 index 000000000..194f4a272 --- /dev/null +++ b/tw/error/__Template_error.php @@ -0,0 +1,69 @@ +TW_errors($options); + + // TODO: add your own module initializations here + } + + // You must implement your own add method. + + function add($errval, $position, $show_code, $param1=null, $param2=null ) + { + $err_id = "TW_err_".$this->identifier++; + // + // This is basic error implementation. Modify this code only if you really need it. + // + $this->ErrorArray[$err_id] = array($errval, $param1, $param2, $position, $show_code); + if($errval & 0xff00) $this->IsError = 1; + + // TODO: create your output additions here, and return it as string + + return null; + } + + // TODO: + // You can implement ohther methods here. + // For example: special error filters, post-parsing functions for generating + // error lists, etc... + +} +?> \ No newline at end of file diff --git a/tw/filter-setup/paranoya.php b/tw/filter-setup/paranoya.php new file mode 100644 index 000000000..0795c7755 --- /dev/null +++ b/tw/filter-setup/paranoya.php @@ -0,0 +1,59 @@ + array( + "href" => array( TW_RQ_URL ), // value is required url + "name" => array( TW_RQ_LINK ), // value is link (link+href combination must be fixed in base) + "title" => null, + ), + + "hr" => null, // without attributes + "br" => null, + "img" => array( + "width" => array( TW_NUM, 80, 60, 120 ), // 80 - default, number must be in range <60,120> + "height"=> array( TW_NUM, 80, 60, 120 ), + "src" => array( TW_RQ_URL ), + "title" => null, + "border"=> array( TW_RQ_NUM, 0, 0, 0), + ), + + "p" => array( + // null - default value (null = remove attr if value not found in case array) + "class" => array( TW_CASE, null, array("par1","par2","par3") ), + ), + + "b" => "strong", // tag substitution -> + "strong" => null, // new tag must be configured too + "i" => null, + "u" => null, + "div" => array( + "title" => null, + ), + + "span" => array( + "class" => array( TW_CASE, null, array("my-class1","my-class2","my-class3") ), + ), + + "blockquote" => null, + "h1" => null, + "h2" => null, + "h3" => null, + + "table" => null, + "td" => null, + "tr" => null, + "th" => null, + + "ul" => null, + "ol" => null, + "li" => null, + "dl" => null, + "dt" => null, + "dd" => null, + ); +?> \ No newline at end of file diff --git a/tw/lang/TW_base.php b/tw/lang/TW_base.php new file mode 100644 index 000000000..ae84d6e8f --- /dev/null +++ b/tw/lang/TW_base.php @@ -0,0 +1,471 @@ +pt text + * $this->pti text index + */ +class TW_base // TW_lang extends this class +{ + var $error; // error module object + var $output; // output module object + var $out; // output string + + var $content_off; + + var $tags; // relationships between tags + var $config; // current configuration (twParser::strip_tags()) + var $config_tags; // tags in current configuration + var $config_attr; // attributes in current configuration + var $config_req_attr; // required attributes + + + var $TAG; + var $is_attributes; + var $ATTRIBUTES; // index => array("atr","value") + var $ATTR; // array(atr,value) + var $VALUE; + +/******************************************************************************************** + * BASE CONSTRUCTOR + */ + function TW_base() + { + global $tw_tag_relations; + + $this->stack(); + $this->tags = &$tw_tag_relations; + $this->content_off = 0; + } +/* + * BASE "DESTRUCTOR" (parser call this function if end of string is rached) + */ + function base_end() + { + while( $tag = $this->stack_pop() ) + { + $this->out .= $this->output->close( $tag ); + } + return null; + } + + +/******************************************************************************************** + * TAG STACK implementation + */ + var $Tstack; + + function stack() { $this->Tstack[0] = null; } + function stack_push( $tag ) { array_unshift( $this->Tstack, $tag ); } + function stack_pop() { return array_shift( $this->Tstack ); } + function stack_search( $tag ) { return in_array( $tag, $this->Tstack ); } + + function stack_top( $tag = null ) + { + if( $tag ) return $this->Tstack[0] == $tag; + return $this->Tstack[0]; + } + + + + +/******************************************************************************************** + * S T A R T _ T A G _ f i l t e r + * + */ + + function START_TAG_filter() + { + $tag = $this->TAG; + if( $tag == null ) + { + //###### syntax error + return; + } + if( in_array($tag,$this->config_tags) ) + { + //enabled tag + + if(is_string($this->config_attr[$tag])) + { + $this->TAG = $tag = $this->config_attr[$tag]; + //tag substitution warning + } + + // ------- perform attribute check ----------- + + if($this->is_attributes) + { + if(!$this->config_attr[$tag]) + { + $this->ATTRIBUTES = null; + // remove attribute warning + } + else + { + foreach( $this->ATTRIBUTES as $key => $attr ) + { + if( ! in_array($attr[0], $this->config_attr[$tag] ) ) + { + $this->ATTRIBUTES[$key][1] = null; + // remove attribute warning + continue; + } + + // --------- perform value check ----------- + + if(($val = $attr[1])==null) continue; + if(($cmda = &$this->config[$tag][$attr[0]])==null) continue; //null - accept all values + switch($cmda[0] & 7) + { + case TW_URL: + // V0.1.2: fixed some fatal bugs, big thanx to fczbkk + // + // TODO: make better url check with parse_url() + + $val = strtolower($val); + if(strpos($val, "http://") === false) + { + if(strpos($val, "ftp://") === false) + if(strpos($val, "email:") === false) + if(strpos($val, "https://") === false) + if(strpos($val, "./") === false) // local relative url + $val = "http://".$val; + } + $this->ATTRIBUTES[$key][1] = $val; + break; + + case TW_LINK: + //TODO: add link separator check here. + // Do not use this attribute in config! + break; + + case TW_NUM: + if( $val >= $cmda[2] && $val <= $cmda[3] ) break; + $this->ATTRIBUTES[$key][1] = $cmda[1]; + break; + + case TW_CASE: + if( !in_array($val, $cmda[2]) ) $this->ATTRIBUTES[$key][1] = $cmda[1]; + break; + } + } + } + } + // check required attributes + if($this->config_req_attr[$tag]) + { + if( $this->is_attributes ) + foreach( $this->config_req_attr[$tag] as $required ) + { + $req_found = 0; + foreach( $this->ATTRIBUTES as $val) + if( $val[0] == $required ) + { + $req_found = 1; + break; + } + if($req_found) continue; + + switch( $this->config[$tag][$required][0] & 7 ) + { + case TW_LINK: + case TW_URL: + // error + break; + + default: + $this->ATTRIBUTES[$required] = array($required, $this->config[$tag][$required][1]); + break; + } + } + else + { + foreach( $this->config_req_attr[$tag] as $required ) + { + switch( $this->config[$tag][$required][0] & 7 ) + { + case TW_LINK: + case TW_URL: + + // required tag error + + break; + + default: + $this->ATTRIBUTES[$required] = array($required, $this->config[$tag][$required][1]); + break; + } + } + } + + } + // cross tag removal algorithm + + $flag = $this->tags[$tag]; + $top = $this->stack_top(); + + if( $flag[2] != null ) // check if tag before is specified + { + // yes, tag before is specified, check relationship + if(! in_array($top, $flag[2]) ) + { + if( $flag[0] & TW_OPT ) + { + if( $top == $tag ) + { + // End Tag is optional and current tag is the same as last tag (on stack). + // Close previos for XHTML compatibility and open new the same tag. + // Return, because no manipulation with stack is required. + $this->out .= $this->output->close( $tag ); + $this->out .= $this->output->pair( $tag, $this->ATTRIBUTES ); + return; + } + if( $this->stack_search($tag) ) + { + // repair stack + while(($top = $this->stack_pop() ) != $tag ) + { + /*if( !($this->tags[$top][0] & TW_OPT ) ) + { + // auto close warning + }*/ + $this->out .= $this->output->close( $top ); + } + $this->stack_push($tag); + $this->out .= $this->output->close( $tag ); + $this->out .= $this->output->pair( $tag, $this->ATTRIBUTES ); + return; + } + } + // ... =>> ...... ( & they have common parent tag () ) + if( $this->tags[$top][0] & TW_OPT ) + { + if( $this->tags[$top][2] == $flag[2] ) + { + $this->out .= $this->output->close( $this->stack_pop() ); + $this->out .= $this->output->pair( $tag, $this->ATTRIBUTES ); + $this->stack_push( $tag ); + return; + } + } + // invalid relation between tags ####### + return; + } + // valid relationship + } + if( $flag[0] & TW_NOP ) + { + //tag without End Tag
+ $this->out .= $this->output->single( $tag, $this->ATTRIBUTES ); + } + else + { + //Tag with End Tag, push to stack + if( ($top == $tag) && ($flag[0] & TW_OPT) ) + { + // '

' => '

' + $this->out .= $this->output->close( $tag ); + $this->out .= $this->output->pair( $tag, $this->ATTRIBUTES ); + return; + } + $this->out .= $this->output->pair( $tag, $this->ATTRIBUTES ); + $this->stack_push( $tag ); + } + return; + } + else + { + //disabled tag (or not supported) ###### + return; + } + } + +/******************************************************************************************** + * E N D _ T A G _ f i l t e r + * + */ + function END_TAG_filter() + { + $tag = $this->TAG; + if( $tag == null ) + { + // + if ($tag = $this->stack_pop() ) + { + $this->out .= $this->output->close($tag); + return; + } + else + { + // kunda underflow :) + return; + } + } + if(in_array($tag,$this->config_tags)) + { + // enabled tag + if(is_string($this->config_attr[$tag])) + { + $this->TAG = $tag = $this->config_attr[$tag]; + //tag substitution warning + } + + $top = $this->stack_top( $tag ); + if( $top ) + { + $this->out .= $this->output->close( $tag ); + $this->stack_pop(); + return; + } + + if( $this->stack_search($tag) ) + { + // closing cross tags + while( ($top = $this->stack_pop()) != $tag ) + { + /*if( !($this->tags[$top][0] & TW_OPT ) ) + { + // auto close warning + }*/ + $this->out .= $this->output->close( $top ); + } + $this->out .= $this->output->close( $tag ); + } + else + { + // ###### cross tag error + return; + } + } + // drop out warning + } + + +/* + * THERE ARE STATE IN, OUT & NEW FUNCTIONS + * + */ + +/******************************************************************************************** + * STATE T_begin '<' + */ + function T_begin_in() + { + $this->tag_position = $this->pti; + $this->TAG = null; + } + + // --- MAIN TAG-FILTER FUNCTION --- + + function T_begin_out($word) + { + $this->START_TAG_filter(); + } + +/******************************************************************************************** + * STATE TC_begin 'tag_position = $this->pti; + $this->TAG = null; + } + + // TAG CLOSE function + // + function T_Cbegin_out($word) + { + $this->END_TAG_filter(); + } + +/******************************************************************************************** + * STATE T_gettag 'tagname' + */ + function T_gettag_in() + { + $this->is_attributes = 0; + $this->ATTRIBUTES = null; + return; + } + + function T_gettag_new($word) { $this->TAG = strtolower($word); } + function T_gettag_out($word) { return; } + +/******************************************************************************************** + * STATE A_begin '__atr...' + */ + function A_begin_in() { $this->ATTR = null; } + function A_begin_new($word) { $this->ATTR[0] = strtolower($word); } + function A_begin_out($word) + { + if( $this->is_attributes ) + { + foreach( $this->ATTRIBUTES as $akey => $aval ) + { + if($this->ATTR[0] == $aval[0]) + { + // duplicate warning + $this->ATTRIBUTES[$akey] = $aval; + return; + } + } + } + else + { + $this->is_attributes = 1; + } + $this->ATTRIBUTES[] = $this->ATTR; + } + +/******************************************************************************************** + * STATE V_begin 'atr...' + */ + function V_begin_in() { $this->VALUE = null; } + function V_begin_out($word) { $this->ATTR[1] = $this->VALUE; } + +/******************************************************************************************** + * STATE VALUE1 + */ + function VALUE1_in() { return; } + function VALUE1_out($word) { $this->VALUE = substr($word,0,strlen($word)-1); } + +/******************************************************************************************** + * STATE VALUE2 + */ + function VALUE2_in() { return; } + function VALUE2_out($word) + { + $this->VALUE = str_replace("\"", """, substr($word, 0, strlen($word)-1) ); + } +/******************************************************************************************** + * STATE VALUE3 + */ + function VALUE3_in() { return; } + function VALUE3_out($word) { $this->VALUE = $word; } + + +} // END class TW_base +?> \ No newline at end of file diff --git a/tw/lang/TW_lang.php b/tw/lang/TW_lang.php new file mode 100644 index 000000000..2c962213a --- /dev/null +++ b/tw/lang/TW_lang.php @@ -0,0 +1,183 @@ +signature = "TW"; + $this->version = "1.0"; + $this->initial_state="OUT"; + $this->states = + array + ( + "OUT" => array ( + array( + "<" => array("T_tagWall",0), + + //"&" => array("VChar",0), // validate char (currently not implemented in base) + ), + + PF_CLEAN, // PF_CLEAN - znaky sa forwarduju na vystup + 0,0 + ), + + "T_tagWall" => array ( + array( + "ALPHA" => array("T_begin",1), // normal tag + "/" => array("T_Cbegin",0), // close tag + "<" => array("T_tagWall",1), // '<<<<<<<<' fix (faster than _RET) + "!--" => array("HTML_comment",0), + "_ALL" => array("_RET",0), // ' array ( + array( + "ALPHA" => array("T_gettag",1), + "_ALL" => array("_RET",0), + ), + PF_XIO | PF_XDONE, 0,0 + ), + + + "T_begin" => array ( + array( + "ALPHA" => array("T_gettag",1), + "_ALL" => array("_RET",0), + ), + + PF_XIO | PF_XDONE, 0,0 + ), + + + "T_gettag" => array ( + array( + "!ALNUM"=> array("T_in",1), + ), + + PF_XIO | PF_XDONE | PF_XNEW, 0,0 + ), + + + "T_in" => array( + array( + "ALPHA" => array("A_begin",1), // char back to stream + ">" => array("_RET",0), + "/>" => array("_RET",0), // pozor na spracovanie v T_begin + ), + + 0,0,0 + ), + + "A_begin" => array ( + array( + "!ALPHA"=> array("V_begin",1), + ">" => array("_RET",1), // vracia string do streamu + "/>" => array("_RET",1), // pozor na spracovanie v TAGbegin + ), + + PF_XIO | PF_XDONE | PF_XNEW , 0,0 + ), + + // this is wide attribute=value implementation + "V_begin" => array ( + array( + '"' => array("VALUE1",0), + "'" => array("VALUE2",0), + "ALNUM" => array("VALUE3",1), + + ">" => array("_RET",1), + "/>" => array("_RET",1), + ), + + PF_XIO | PF_XDONE, 0,0 + ), + + // "DOUBLEQUOTED VALUE" + "VALUE1" => array( + array( + '"' => array("_RET",0), + ), + PF_XIO, 0,0 + ), + + // 'SINGLEQUOTED VALUE' + "VALUE2" => array( + array( + "'" => array("_RET",0), + ), + PF_XIO, 0,0 + ), + + // UNQUOTEDVALUE99 + "VALUE3" => array( + array( +/* "_" => array ("VALUE3",0), //Uncomment for better HTML4 compatibility (not recommended) + "." => array ("VALUE3",0), +*/ + "-" => array ("VALUE3",0), + "!ALNUM"=> array("_RET",1), + ), + PF_XIO, 0,0 + ), + + // all comment content will be removed + "HTML_comment" => array( + array( + "-->" => array("_RET",0), + ), + 0,0,0 + ), + ); + + $this->keywords=null; + } +} +?> \ No newline at end of file diff --git a/tw/localization/EN_errors.php b/tw/localization/EN_errors.php new file mode 100644 index 000000000..7e7a0a81b --- /dev/null +++ b/tw/localization/EN_errors.php @@ -0,0 +1,57 @@ + null, + TWE_VERSION => "Versions: parser V %s, language V%s", + TWE_CREDITS => "tag|wall: code Juraj Durech (hvge@cauldron.sk).", + TWE_NOTE => "%s", + + // warnings + TWE_STACK_UNDERFLOW => "Stack underflow, tag '%s' was dropped out.", + TWE_STACK_NOT_EMPTY => "There are some unclosed tags on stack.", + TWE_UNEXPECTED_EOST => "Unexpected end of stream.", + TWE_UNEXPECTED_QUOTE => "Unexpected quote.", + + // errors + TWE_SYNTAX => "HTML syntax error.", + TWE_TOO_MANY_ATTRS => "Too many attributes in tag '%s'.", + + // internal errors + TWE_FILE_NOT_FOUND => "File '%s' not found.", + TWE_BAD_SIGNATURE => "Language '%s' have bad signature '%s'.", + TWE_LANG_NOT_FOUND => "Language '%s' not found.", + + ); +?> \ No newline at end of file diff --git a/tw/output/HL_DEBUG_output.php b/tw/output/HL_DEBUG_output.php new file mode 100644 index 000000000..4a2521bd4 --- /dev/null +++ b/tw/output/HL_DEBUG_output.php @@ -0,0 +1,74 @@ +highlight( "<$tag>" ); + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= ' '.$value[0].'="'.$value[1].'"'; + } + return $this->highlight( "<$tag$attr>" ); + } + + function single ($tag, &$attributes) + { + if($attributes == null) + return $this->highlight( "<$tag />" ); + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= $value[0].'="'.$value[1].'" '; + } + return $this->highlight( "<$tag $attr/>" ); + } + + // template for end tags + function close ($tag) + { + return $this->highlight( "" ); + } + + function template_end() { return null; } + + + function highlight($string) + { + $string = str_replace("&","&",$string); + return ''.str_replace("<","<",$string).''; + } + +} //END class HTML_output + +?> \ No newline at end of file diff --git a/tw/output/HTML_output.php b/tw/output/HTML_output.php new file mode 100644 index 000000000..3df2fef9d --- /dev/null +++ b/tw/output/HTML_output.php @@ -0,0 +1,67 @@ +"; + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= ' '.$value[0].'="'.$value[1].'"'; + } + return "<$tag$attr>"; + } + + function single ($tag, &$attributes) + { + if($attributes == null) + return "<$tag>"; + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= ' '.$value[0].'="'.$value[1].'"'; + } + return "<$tag$attr>"; + } + + // template for end tags + function close ($tag) + { + return ""; + } + + function template_end() { return null; } + +} //END class HTML_output + +?> \ No newline at end of file diff --git a/tw/output/XHTML_output.php b/tw/output/XHTML_output.php new file mode 100644 index 000000000..89207b611 --- /dev/null +++ b/tw/output/XHTML_output.php @@ -0,0 +1,67 @@ +"; + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= ' '.$value[0].'="'.$value[1].'"'; + } + return "<$tag$attr>"; + } + + function single ($tag, &$attributes) + { + if($attributes == null) + return "<$tag />"; + $attr = null; + foreach ($attributes as $value) + { + if($value[1]) $attr .= $value[0].'="'.$value[1].'" '; + } + return "<$tag $attr/>"; + } + + // template for end tags + function close ($tag) + { + return ""; + } + + function template_end() { return null; } + +} //END class XHTML_output + +?> \ No newline at end of file diff --git a/tw/tw-config.php b/tw/tw-config.php new file mode 100644 index 000000000..63b4fb40e --- /dev/null +++ b/tw/tw-config.php @@ -0,0 +1,117 @@ + \ No newline at end of file diff --git a/tw/tw-errors.php b/tw/tw-errors.php new file mode 100644 index 000000000..59881a875 --- /dev/null +++ b/tw/tw-errors.php @@ -0,0 +1,118 @@ +... + +// errors +define('TWE_SYNTAX', 0x0100); // null, HTML syntax error (for future strict bases) +define('TWE_TOO_MANY_ATTRS', 0x0200); // tag_name, too many attrs in tag .. + +// internal errors +define('TWE_FILE_NOT_FOUND', 0x1000); // param1 = file +define('TWE_BAD_SIGNATURE', 0x2000); // param1 = language, param2 = signature +define('TWE_LANG_NOT_FOUND', 0x3000); // param1 = language + +// indexes to ErrorArray +define('TWE_ERRNO', 0); // error value +define('TWE_PARAM1', 1); // parameter 1 +define('TWE_PARAM2', 2); // parameter 2 +define('TWE_POSIT', 3); // position in source +define('TWE_CODE', 4); // piece of bad code + +class TW_errors +{ + var $IsError; + var $ErrorArray; + var $identifier; + + /* class constructor + */ + function TW_errors($options = 0) + { + $this->IsError = 0; + $this->ErrorArray = null; + $this->identifier = 0; + } + + function is_error() { return $this->IsError; } + + function get_err_array() { return $this->ErrorArray; } + + function get_comments() { return $this->get_by_mask(0x000f); } + + function get_warnings() { return $this->get_by_mask(0x00f0); } + + function get_errors() { return $this->get_by_mask(0x0f00); } + + function get_internal() { return $this->get_by_mask(0xf000); } + + function get_by_mask($mask) + { + $ErrTemp = null; + foreach($this->ErrorArray as $key => $value) + { + if($value[TWE_ERRNO] & $mask) $ErrTemp[$key] = $value; + } + return $ErrTemp; + } + + /* Input: + * id: error id, + * lang: error_language_array + * + * Outupt: + * error text or null + */ + function get_error_text ( $id, &$lang ) + { + if( in_array($id, $this->ErrorArray) ) + { + $errno = $this->ErrorArray[$id][TWE_ERRNO]; + if( in_array($errno,$lang) ) + return sprintf( $lang[$errno], + $this->ErrorArray[$id][TWE_PARAM1], + $this->ErrorArray[$id][TWE_PARAM2] ); + else + return sprintf( "Please translate errno 0x%x.",$errno ); + } + return null; + } + +} // END class TW_errors +?> \ No newline at end of file diff --git a/tw/tw-tags.php b/tw/tw-tags.php new file mode 100644 index 000000000..6a83be147 --- /dev/null +++ b/tw/tw-tags.php @@ -0,0 +1,97 @@ +...::: => ...::: + * + * TW_NOP - tag without end tag + *


etc.. + * + * TW_OPT - tag have optional end tag ( optional end tag is closed automatically ) + *
  • line1
  • line2 =>
  • line1
  • line2
  • + * + * CONT.OFF - content off ( not implemented ) + * content-offcontent-on => content-on + * + * REQ.TAG - required tag in stack + *
    • ... this is correct + *

    • ... this is not correct, result is

      ... + */ +$tw_tag_relations = array( + +// TAG FLAG CONT.OFF REQ. TAG + + "a" => array(0, 0, null), + "b" => array(0, 0, null), + "blockquote"=> array(0, 0, null), + "big" => array(0, 0, null), + "br" => array(TW_NOP, 0, null), + "code" => array(0, 0, null), + "dl" => array(0, 0, null), + "dt" => array(TW_OPT, 0, array("dl")), + "dd" => array(TW_OPT, 0, array("dl")), + "div" => array(0, 0, null), + "em" => array(0, 0, null), + "h1" => array(0, 0, null), + "h2" => array(0, 0, null), + "h3" => array(0, 0, null), + "h4" => array(0, 0, null), + "h5" => array(0, 0, null), + "h6" => array(0, 0, null), + "hr" => array(TW_NOP, 0, null), + "i" => array(0, 0, null), + "img" => array(TW_NOP, 0, null), + "ul" => array(0, 1, null), + "ol" => array(0, 1, null), + "li" => array(TW_OPT, 0, array("ul","ol")), + "object" => array(0, 1, null), + "p" => array(TW_OPT, 0, null), + "pre" => array(0, 0, null), + "small" => array(0, 0, null), + "span" => array(0, 0, null), + "strong" => array(0, 0, null), + "style" => array(0, 1, null), + "sub" => array(0, 0, null), + "sup" => array(0, 0, null), + "table" => array(0, 1, null), + "caption" => array(0, 0, array("table")), + "tbody" => array(0, 0, array("table")), + "tfoot" => array(0, 0, array("table")), + "thead" => array(0, 0, array("table")), + "tr" => array(TW_OPT, 1, array("table","tbody")), + "td" => array(TW_OPT, 0, array("tr")), + "th" => array(TW_OPT, 0, array("tr")), + "u" => array(0, 0, null), + + // TODO: add your specific tags here... + + ); +?> \ No newline at end of file diff --git a/tw/tw.php b/tw/tw.php new file mode 100644 index 000000000..c4f813edd --- /dev/null +++ b/tw/tw.php @@ -0,0 +1,291 @@ +lang = new $_lang; + $this->base = $_base; + $this->options = $options; + $this->_trans = &$this->lang->trans; + $this->_flags = &$this->lang->flags; + $this->_delim = &$this->lang->delim; + $this->_ret = &$this->lang->ret; + $this->_quit = &$this->lang->quit; + $this->_names = &$this->lang->names; + + $this->content_off = &$this->lang->content_off; + } + + /* STRIP TAGS + * + * input: + * string $text - input string + * array $configuration - filter configuration array ( see files in directory tw/filter-setup/ ) + * string $output_module - output module name ( tw/output ) + * string $error_module - error module name ( tw/error ) + * int $offset - offset in $text + * + * output: + * parsed string + */ + function strip_tags ( + $text, + &$configuration, + $output_module = "XHTML", + $error_module = "FOO", + $offset = 0 + ) + { + // open modules + $_err = $error_module."_error"; + require_once (TW_ERRMODULE."$_err.php"); + $this->err = new $_err( $this->options ); + + $_out = $output_module."_output"; + require_once (TW_OUTMODULE."$_out.php"); + $this->output = new $_out; + + // parser init + $this->text = &$text; + $this->textlen = strlen($text); + $this->text .= "IMNOTREALLYOPTIMISTIC"; + $this->textpos = $offset; + $this->out = null; + + // FSHL pointers init + $this->lang->pt = &$this->text; + $this->lang->pti = &$this->textpos; + $this->lang->out = &$this->out; + $this->lang->err = &$this->err; + $this->lang->output = &$this->output; + + // base init + $base = &$this->base; + $this->lang->$base(); + $this->lang->config_tags = array_keys($configuration); + // load initial configuration + foreach($configuration as $tag => $attributes) + { + $this->lang->config_req_attr[$tag] = null; + if(is_array($attributes)) + { + $this->lang->config_attr[$tag] = array_keys($configuration[$tag]); + foreach($attributes as $attr => $command) + { + if( $command ) + if( $command[0] & TW_REQ ) $this->lang->config_req_attr[$tag][] = $attr; + } + } + else + { + $this->lang->config_attr[$tag] = $attributes; + } + } + $this->lang->config = &$configuration; + + // start parser + $this->parse_string ( $this->lang->initial_state ); + + $this->out .= $this->lang->base_end(); + $this->out .= $this->output->template_end(); + + return $this->out; + } + + function get_position() { return $this->textpos; } + + function get_out() { return $this->out; } + + // error wrapper + + function is_error() { return $this->err->is_error(); } + function get_err_array() { return $this->err->get_err_array(); } + function get_comments() { return $this->err->get_by_mask(0x000f); } + function get_warnings() { return $this->err->get_by_mask(0x00f0); } + function get_errors() { return $this->err->get_by_mask(0x0f00); } + function get_internal() { return $this->err->get_by_mask(0xf000); } + function get_by_mask($mask) { return $this->err->get_by_mask($mask); } + function get_error_text ( $id, &$lang ) { return $this->err->get_error_text ( $id, &$lang ); } + +// --------------------------------------------------------------------------------- +// LOW LEVEL functions +// + +// main parser function +// +function parse_string ($state) +{ + $flags = $this->_flags[$state]; + $statename_n = $this->_names[$state]."_new"; + // perform IN function if required + if( $flags & PF_XIO ) + { + $statename_i = $this->_names[$state]."_in"; + $statename_o = $this->_names[$state]."_out"; + $this->lang->$statename_i(); + } + $stateword = null; + + while( ($word = $this->getword("isd$state")) != null ) + { + if(is_array($word)) + { + // word is delimiter + $newstate = $this->_trans[$state][$word[0]][XL_DSTATE]; + + // char back to stream (CB2S) if required + if( $this->_trans[$state][$word[0]][XL_DTYPE] ) + { + if( $newstate == $state ) + { + // If it is the same state, CB2S flag have different significance + // re-initialize state (call IN function) + $stateword = null; + if( $flags & PF_XIO ) $this->lang->$statename_i(); + continue; + } + $this->textpos -= strlen($word[1]); + } + else + { + $stateword .= $word[1]; // add new parsed word to stateword + } + if( $newstate == $this->_ret ) // newstate is _RET from recursion + { + // perform NEW function if required + if( $flags & PF_XNEW ) $this->lang->$statename_n($stateword); + // perform OUT function if required + if( $flags & PF_XIO ) $this->lang->$statename_o($stateword); + // return from recursion + return; + } + + if( $state != $newstate ) // recursion - only if it is really new state + { + // perform NEW function if required + if( $flags & PF_XNEW ) $this->lang->$statename_n($stateword); + // recursion + $this->parse_string($newstate); + // perform OUT function if required and return. + if( $flags & PF_XDONE ) + { + if( $flags & PF_XIO ) $this->lang->$statename_o(null); + return; + } + continue; + } + } + else + { + // word is not delimiter + if( $flags & PF_CLEAN ) + { + if(!$this->content_off) + $this->out .= str_replace("<",">",$word); + } + else + { + $stateword .= $word; + } + } + } //END while() + + // TODO: check this OUT + + // perform NEW function if required + if( $flags & PF_XNEW ) $this->lang->$statename_n($stateword); + // perform OUT function if required and return. + if( $flags & PF_XIO ) $this->lang->$statename_o($stateword); +} + +// get word from stream +// +function getword ($state) +{ + $result = null; + if($this->textpos < $this->textlen) + { + $del = $this->lang->$state(); // call "is delimiter" isdX function + if($del != false) + { + // actual char (or sub-string) is delimiter + $this->textpos += strlen($del[1]); + return $del; + } + else + { + // Actual char/string is not delimiter. + // Result word is between current position and first delimiter in stream + $result = $this->text[$this->textpos++]; + while(($this->textpos < $this->textlen) && !$this->lang->$state()) + $result .= $this->text[$this->textpos++]; + } + } + return $result; +} + +} // END class twParser +?> \ No newline at end of file diff --git a/tw/tw_cache/TW_lang.php b/tw/tw_cache/TW_lang.php new file mode 100644 index 000000000..c510a5a1e --- /dev/null +++ b/tw/tw_cache/TW_lang.php @@ -0,0 +1,177 @@ +version="1.0"; + $this->signature="TW"; + $this->generator_version="0.4.1"; + $this->initial_state=0; + $this->trans=array(0=>array("<"=>array(0=>1,1=>0,),),1=>array("ALPHA"=>array(0=>3,1=>1,),"/"=>array(0=>2,1=>0,),"<"=>array(0=>1,1=>1,),"!--"=>array(0=>11,1=>0,),"_ALL"=>array(0=>12,1=>0,),),2=>array("ALPHA"=>array(0=>4,1=>1,),"_ALL"=>array(0=>12,1=>0,),),3=>array("ALPHA"=>array(0=>4,1=>1,),"_ALL"=>array(0=>12,1=>0,),),4=>array("!ALNUM"=>array(0=>5,1=>1,),),5=>array("ALPHA"=>array(0=>6,1=>1,),">"=>array(0=>12,1=>0,),"/>"=>array(0=>12,1=>0,),),6=>array("!ALPHA"=>array(0=>7,1=>1,),">"=>array(0=>12,1=>1,),"/>"=>array(0=>12,1=>1,),),7=>array("\""=>array(0=>8,1=>0,),"'"=>array(0=>9,1=>0,),"ALNUM"=>array(0=>10,1=>1,),">"=>array(0=>12,1=>1,),"/>"=>array(0=>12,1=>1,),),8=>array("\""=>array(0=>12,1=>0,),),9=>array("'"=>array(0=>12,1=>0,),),10=>array("-"=>array(0=>10,1=>0,),"!ALNUM"=>array(0=>12,1=>1,),),11=>array("-->"=>array(0=>12,1=>0,),),); + $this->flags=array(0=>256,1=>1024,2=>1536,3=>1536,4=>3584,5=>0,6=>3584,7=>1536,8=>512,9=>512,10=>512,11=>0,); + $this->delim=array(0=>array(0=>"<",),1=>array(0=>"ALPHA",1=>"/",2=>"<",3=>"!--",4=>"_ALL",),2=>array(0=>"ALPHA",1=>"_ALL",),3=>array(0=>"ALPHA",1=>"_ALL",),4=>array(0=>"!ALNUM",),5=>array(0=>"ALPHA",1=>">",2=>"/>",),6=>array(0=>"!ALPHA",1=>">",2=>"/>",),7=>array(0=>"\"",1=>"'",2=>"ALNUM",3=>">",4=>"/>",),8=>array(0=>"\"",),9=>array(0=>"'",),10=>array(0=>"-",1=>"!ALNUM",),11=>array(0=>"-->",),); + $this->ret=12; + $this->quit=13; + $this->names=array(0=>"OUT",1=>"T_tagWall",2=>"T_Cbegin",3=>"T_begin",4=>"T_gettag",5=>"T_in",6=>"A_begin",7=>"V_begin",8=>"VALUE1",9=>"VALUE2",10=>"VALUE3",11=>"HTML_comment",12=>"_RET",13=>"_QUIT",); +} + +/* OUT */ +function isd0 () +{ +$c1=$this->pt[$this->pti]; +if($c1=="<") + return array("<","<"); +return false; +} + +/* T_tagWall */ +function isd1 () +{ +$p=$this->pti; +$c1=$this->pt[$p++]; +$c2=$c1.$this->pt[$p++]; +$c3=$c2.$this->pt[$p]; +if(stristr("eaoinltsrvdukzmcpyhjbfgxwq",$c1)) + return array("ALPHA",$c1); +if($c1=="/") + return array("/","/"); +if($c1=="<") + return array("<","<"); +if($c3=="!--") + return array("!--","!--"); +return array("_ALL",$c1); +} + +/* T_Cbegin */ +function isd2 () +{ +$c1=$this->pt[$this->pti]; +if(stristr("eaoinltsrvdukzmcpyhjbfgxwq",$c1)) + return array("ALPHA",$c1); +return array("_ALL",$c1); +} + +/* T_begin */ +function isd3 () +{ +$c1=$this->pt[$this->pti]; +if(stristr("eaoinltsrvdukzmcpyhjbfgxwq",$c1)) + return array("ALPHA",$c1); +return array("_ALL",$c1); +} + +/* T_gettag */ +function isd4 () +{ +$c1=$this->pt[$this->pti]; +if(!stristr("eaoinltsrvdukzmcpyhjbfgxwq0123456789",$c1)) + return array("!ALNUM",$c1); +return false; +} + +/* T_in */ +function isd5 () +{ +$p=$this->pti; +$c1=$this->pt[$p++]; +$c2=$c1.$this->pt[$p]; +if(stristr("eaoinltsrvdukzmcpyhjbfgxwq",$c1)) + return array("ALPHA",$c1); +if($c1==">") + return array(">",">"); +if($c2=="/>") + return array("/>","/>"); +return false; +} + +/* A_begin */ +function isd6 () +{ +$p=$this->pti; +$c1=$this->pt[$p++]; +$c2=$c1.$this->pt[$p]; +if(!stristr("eaoinltsrvdukzmcpyhjbfgxwq",$c1)) + return array("!ALPHA",$c1); +if($c1==">") + return array(">",">"); +if($c2=="/>") + return array("/>","/>"); +return false; +} + +/* V_begin */ +function isd7 () +{ +$p=$this->pti; +$c1=$this->pt[$p++]; +$c2=$c1.$this->pt[$p]; +if($c1=="\"") + return array("\"","\""); +if($c1=="'") + return array("'","'"); +if(stristr("eaoinltsrvdukzmcpyhjbfgxwq0123456789",$c1)) + return array("ALNUM",$c1); +if($c1==">") + return array(">",">"); +if($c2=="/>") + return array("/>","/>"); +return false; +} + +/* VALUE1 */ +function isd8 () +{ +$c1=$this->pt[$this->pti]; +if($c1=="\"") + return array("\"","\""); +return false; +} + +/* VALUE2 */ +function isd9 () +{ +$c1=$this->pt[$this->pti]; +if($c1=="'") + return array("'","'"); +return false; +} + +/* VALUE3 */ +function isd10 () +{ +$c1=$this->pt[$this->pti]; +if($c1=="-") + return array("-","-"); +if(!stristr("eaoinltsrvdukzmcpyhjbfgxwq0123456789",$c1)) + return array("!ALNUM",$c1); +return false; +} + +/* HTML_comment */ +function isd11 () +{ +$p=$this->pti; +$c3=$this->pt[$p++].$this->pt[$p++].$this->pt[$p]; +if($c3=="-->") + return array("-->","-->"); +return false; +} + +} +?> \ No newline at end of file