1 <?php
2 /**
3 * The content negotiator performs "text/html" or "application/xhtml+xml" switching.
4 * It does this through the static function ContentNegotiator::process().
5 * By default, ContentNegotiator will comply to the Accept headers the clients
6 * sends along with the HTTP request, which is most likely "application/xhtml+xml"
7 * (see "Order of selection" below).
8 *
9 * Order of selection between html or xhtml is as follows:
10 * - if PHP has already sent the HTTP headers, default to "html" (we can't send HTTP Content-Type headers any longer)
11 * - if a GET variable ?forceFormat is set, it takes precedence (for testing purposes)
12 * - if the user agent is detected as W3C Validator we always deliver "xhtml"
13 * - if an HTTP Accept header is sent from the client, we respect its order (this is the most common case)
14 * - if none of the above matches, fallback is "html"
15 *
16 * ContentNegotiator doesn't enable you to send content as a true XML document
17 * through the "text/xml" or "application/xhtml+xml" Content-Type.
18 * Please see http://webkit.org/blog/68/understanding-html-xml-and-xhtml/ for further information.
19 *
20 * @package sapphire
21 * @subpackage control
22 * @see http://doc.silverstripe.org/doku.php?id=xhtml-support
23 * @see http://doc.silverstripe.org/doku.php?id=contentnegotiator
24 * @see http://doc.silverstripe.org/doku.php?id=html
25 *
26 * @todo Check for correct XHTML doctype in xhtml()
27 * @todo Allow for other HTML4 doctypes (e.g. Transitional) in html()
28 * @todo Make content replacement and doctype setting two separately configurable behaviours - some
29 * devs might know what they're doing and don't want contentnegotiator messing with their HTML4 doctypes,
30 * but still find it useful to have self-closing tags removed.
31 */
32 class ContentNegotiator {
33
34 protected static $encoding = 'utf-8';
35
36 protected static $enabled = false;
37
38 /**
39 * Set the character set encoding for this page. By default it's utf-8, but you could change it to, say, windows-1252, to
40 * improve interoperability with extended characters being imported from windows excel.
41 */
42 static function set_encoding($encoding) {
43 self::$encoding = $encoding;
44 }
45
46 /**
47 * Return the character encoding set bhy ContentNegotiator::set_encoding(). It's recommended that all classes that need to
48 * specify the character set make use of this function.
49 */
50 static function get_encoding() {
51 return self::$encoding;
52 }
53
54 /**
55 * Enable content negotiation for all templates, not just those with the xml header.
56 */
57 static function enable() {
58 self::$enabled = true;
59 }
60
61 /*
62 * Disable content negotiation for all templates, not just those with the xml header.
63 */
64 static function disable() {
65 self::$enabled = false;
66 }
67
68 /**
69 * Returns true if negotation is enabled for the given response.
70 * By default, negotiation is only enabled for pages that have the xml header.
71 */
72 static function enabled_for($response) {
73 $contentType = $response->getHeader("Content-Type");
74
75 // Disable content negotation for other content types
76 if($contentType && substr($contentType, 0,9) != 'text/html' && substr($contentType, 0,21) != 'application/xhtml+xml') return false;
77
78 if(self::$enabled) return true;
79 else return (substr($response->getBody(),0,5) == '<' . '?xml');
80 }
81
82 static function process(SS_HTTPResponse $response) {
83 if(!self::enabled_for($response)) return;
84
85 $mimes = array(
86 "xhtml" => "application/xhtml+xml",
87 "html" => "text/html",
88 );
89 $q = array();
90 if(headers_sent()) {
91 $chosenFormat = "html";
92
93 } else if(isset($_GET['forceFormat'])) {
94 $chosenFormat = $_GET['forceFormat'];
95
96 } else {
97 // The W3C validator doesn't send an HTTP_ACCEPT header, but it can support xhtml. We put this special case in here so that
98 // designers don't get worried that their templates are HTML4.
99 if(isset($_SERVER['HTTP_USER_AGENT']) && substr($_SERVER['HTTP_USER_AGENT'], 0, 14) == 'W3C_Validator/') {
100 $chosenFormat = "xhtml";
101
102 } else {
103 foreach($mimes as $format => $mime) {
104 $regExp = '/' . str_replace(array('+','/'),array('\+','\/'), $mime) . '(;q=(\d+\.\d+))?/i';
105 if (isset($_SERVER['HTTP_ACCEPT']) && preg_match($regExp, $_SERVER['HTTP_ACCEPT'], $matches)) {
106 $preference = isset($matches[2]) ? $matches[2] : 1;
107 if(!isset($q[$preference])) $q[$preference] = $format;
108 }
109 }
110
111 if($q) {
112 // Get the preferred format
113 krsort($q);
114 $chosenFormat = reset($q);
115 } else {
116 $chosenFormat = "html";
117 }
118 }
119 }
120
121 $negotiator = new ContentNegotiator();
122 $negotiator->$chosenFormat( $response );
123 }
124
125 /**
126 * Only sends the HTTP Content-Type as "application/xhtml+xml"
127 * if the template starts with the typical "<?xml" Pragma.
128 * Assumes that a correct doctype is set, and doesn't change or append to it.
129 * Replaces a few common tags and entities with their XHTML representations (<br>, <img>, ).
130 *
131 * @param $response SS_HTTPResponse
132 * @return string
133 * @todo More flexible tag and entity parsing through regular expressions or tag definition lists
134 */
135 function xhtml(SS_HTTPResponse $response) {
136 $content = $response->getBody();
137
138 // Only serve "pure" XHTML if the XML header is present
139 if(substr($content,0,5) == '<' . '?xml' ) {
140 $response->addHeader("Content-Type", "application/xhtml+xml; charset=" . self::$encoding);
141 $response->addHeader("Vary" , "Accept");
142
143 // Fix base tag
144 $content = preg_replace('/<base href="([^"]*)"><!--\[if[[^\]*]\]><\/base><!\[endif\]-->/',
145 '<base href="$1"></base>', $content);
146
147 $content = str_replace(' ',' ', $content);
148 $content = str_replace('<br>','<br />', $content);
149 $content = eregi_replace('(<img[^>]*[^/>])>','\\1/>', $content);
150
151 $response->setBody($content);
152
153 } else {
154 return $this->html($response);
155 }
156 }
157
158 /*
159 * Sends HTTP Content-Type as "text/html", and replaces existing doctypes with
160 * HTML4.01 Strict.
161 * Replaces self-closing tags like <img /> with unclosed solitary tags like <img>.
162 * Replaces all occurrences of "application/xhtml+xml" with "text/html" in the template.
163 * Removes "xmlns" attributes and any <?xml> Pragmas.
164 */
165 function html(SS_HTTPResponse $response) {
166 $response->addHeader("Content-Type", "text/html; charset=" . self::$encoding);
167 $response->addHeader("Vary", "Accept");
168
169 $content = $response->getBody();
170 $hasXMLHeader = (substr($content,0,5) == '<' . '?xml' );
171
172 // Fix base tag
173 $content = preg_replace('/<base href="([^"]*)"><\/base>/',
174 '<base href="$1"><!--[if lte IE 6]></base><![endif]-->', $content);
175
176 $content = ereg_replace("<\\?xml[^>]+\\?>\n?",'',$content);
177 $content = str_replace(array('/>','xml:lang','application/xhtml+xml'),array('>','lang','text/html'), $content);
178
179 // Only replace the doctype in templates with the xml header
180 if($hasXMLHeader) {
181 $content = ereg_replace('<!DOCTYPE[^>]+>', '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">', $content);
182 }
183 $content = ereg_replace('<html xmlns="[^"]+"','<html ', $content);
184
185 $response->setBody($content);
186 }
187
188 }