2015-08-17 17:00:26 -07:00
< ? php
namespace Drupal\Component\Utility ;
/**
* Provides user agent related utility functions .
*
* @ ingroup utility
*/
class UserAgent {
/**
* Identifies user agent language from the Accept - language HTTP header .
*
* The algorithm works as follows :
* - map user agent language codes to available language codes .
* - order all user agent language codes by qvalue from high to low .
* - add generic user agent language codes if they aren ' t already specified
* but with a slightly lower qvalue .
* - find the most specific available language code with the highest qvalue .
* - if 2 or more languages are having the same qvalue , respect the order of
* them inside the $languages array .
*
* We perform user agent accept - language parsing only if page cache is
* disabled , otherwise we would cache a user - specific preference .
*
* @ param string $http_accept_language
* The value of the " Accept-Language " HTTP header .
* @ param array $langcodes
* An array of available language codes to pick from .
* @ param array $mappings
* ( optional ) Custom mappings to support user agents that are sending non
* standard language codes . No mapping is assumed by default .
*
* @ return string
* The selected language code or FALSE if no valid language can be
* identified .
*/
public static function getBestMatchingLangcode ( $http_accept_language , $langcodes , $mappings = array ()) {
// The Accept-Language header contains information about the language
// preferences configured in the user's user agent / operating system.
// RFC 2616 (section 14.4) defines the Accept-Language header as follows:
// Accept-Language = "Accept-Language" ":"
// 1#( language-range [ ";" "q" "=" qvalue ] )
// language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
// Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
$ua_langcodes = array ();
if ( preg_match_all ( '@(?<=[, ]|^)([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@' , trim ( $http_accept_language ), $matches , PREG_SET_ORDER )) {
foreach ( $matches as $match ) {
if ( $mappings ) {
$langcode = strtolower ( $match [ 1 ]);
foreach ( $mappings as $ua_langcode => $standard_langcode ) {
if ( $langcode == $ua_langcode ) {
$match [ 1 ] = $standard_langcode ;
}
}
}
// We can safely use strtolower() here, tags are ASCII.
// RFC2616 mandates that the decimal part is no more than three digits,
// so we multiply the qvalue by 1000 to avoid floating point
// comparisons.
$langcode = strtolower ( $match [ 1 ]);
$qvalue = isset ( $match [ 2 ]) ? ( float ) $match [ 2 ] : 1 ;
// Take the highest qvalue for this langcode. Although the request
// supposedly contains unique langcodes, our mapping possibly resolves
// to the same langcode for different qvalues. Keep the highest.
$ua_langcodes [ $langcode ] = max (
( int ) ( $qvalue * 1000 ),
( isset ( $ua_langcodes [ $langcode ]) ? $ua_langcodes [ $langcode ] : 0 )
);
}
}
// We should take pristine values from the HTTP headers, but Internet
// Explorer from version 7 sends only specific language tags (eg. fr-CA)
// without the corresponding generic tag (fr) unless explicitly configured.
// In that case, we assume that the lowest value of the specific tags is the
// value of the generic language to be as close to the HTTP 1.1 spec as
// possible.
// See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 and
// http://blogs.msdn.com/b/ie/archive/2006/10/17/accept-language-header-for-internet-explorer-7.aspx
asort ( $ua_langcodes );
foreach ( $ua_langcodes as $langcode => $qvalue ) {
// For Chinese languages the generic tag is either zh-hans or zh-hant, so
// we need to handle this separately, we can not split $langcode on the
// first occurrence of '-' otherwise we get a non-existing language zh.
// All other languages use a langcode without a '-', so we can safely
// split on the first occurrence of it.
if ( strlen ( $langcode ) > 7 && ( substr ( $langcode , 0 , 7 ) == 'zh-hant' || substr ( $langcode , 0 , 7 ) == 'zh-hans' )) {
$generic_tag = substr ( $langcode , 0 , 7 );
}
else {
$generic_tag = strtok ( $langcode , '-' );
}
if ( ! empty ( $generic_tag ) && ! isset ( $ua_langcodes [ $generic_tag ])) {
// Add the generic langcode, but make sure it has a lower qvalue as the
// more specific one, so the more specific one gets selected if it's
// defined by both the user agent and us.
$ua_langcodes [ $generic_tag ] = $qvalue - 0.1 ;
}
}
// Find the added language with the greatest qvalue, following the rules
// of RFC 2616 (section 14.4). If several languages have the same qvalue,
// prefer the one with the greatest weight.
$best_match_langcode = FALSE ;
$max_qvalue = 0 ;
foreach ( $langcodes as $langcode_case_sensitive ) {
// Language tags are case insensitive (RFC2616, sec 3.10).
$langcode = strtolower ( $langcode_case_sensitive );
// If nothing matches below, the default qvalue is the one of the wildcard
// language, if set, or is 0 (which will never match).
$qvalue = isset ( $ua_langcodes [ '*' ]) ? $ua_langcodes [ '*' ] : 0 ;
// Find the longest possible prefix of the user agent supplied language
// ('the language-range') that matches this site language ('the language
// tag').
$prefix = $langcode ;
do {
if ( isset ( $ua_langcodes [ $prefix ])) {
$qvalue = $ua_langcodes [ $prefix ];
break ;
}
2016-06-02 15:56:09 -07:00
} while ( $prefix = substr ( $prefix , 0 , strrpos ( $prefix , '-' )));
2015-08-17 17:00:26 -07:00
// Find the best match.
if ( $qvalue > $max_qvalue ) {
$best_match_langcode = $langcode_case_sensitive ;
$max_qvalue = $qvalue ;
}
}
return $best_match_langcode ;
}
}