Source: lib/util/xml_utils.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.util.XmlUtils');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.util.Lazy');
  10. goog.require('shaka.util.StringUtils');
  11. /**
  12. * @summary A set of XML utility functions.
  13. */
  14. shaka.util.XmlUtils = class {
  15. /**
  16. * Finds a child XML element.
  17. * @param {!Node} elem The parent XML element.
  18. * @param {string} name The child XML element's tag name.
  19. * @return {Element} The child XML element, or null if a child XML element
  20. * does not exist with the given tag name OR if there exists more than one
  21. * child XML element with the given tag name.
  22. */
  23. static findChild(elem, name) {
  24. const children = shaka.util.XmlUtils.findChildren(elem, name);
  25. if (children.length != 1) {
  26. return null;
  27. }
  28. return children[0];
  29. }
  30. /**
  31. * Finds a namespace-qualified child XML element.
  32. * @param {!Node} elem The parent XML element.
  33. * @param {string} ns The child XML element's namespace URI.
  34. * @param {string} name The child XML element's local name.
  35. * @return {Element} The child XML element, or null if a child XML element
  36. * does not exist with the given tag name OR if there exists more than one
  37. * child XML element with the given tag name.
  38. */
  39. static findChildNS(elem, ns, name) {
  40. const children = shaka.util.XmlUtils.findChildrenNS(elem, ns, name);
  41. if (children.length != 1) {
  42. return null;
  43. }
  44. return children[0];
  45. }
  46. /**
  47. * Finds child XML elements.
  48. * @param {!Node} elem The parent XML element.
  49. * @param {string} name The child XML element's tag name.
  50. * @return {!Array.<!Element>} The child XML elements.
  51. */
  52. static findChildren(elem, name) {
  53. const found = [];
  54. for (const child of elem.childNodes) {
  55. if (child instanceof Element && child.tagName == name) {
  56. found.push(child);
  57. }
  58. }
  59. return found;
  60. }
  61. /**
  62. * @param {!Node} elem the parent XML element.
  63. * @return {!Array.<!Element>} The child XML elements.
  64. */
  65. static getChildren(elem) {
  66. return Array.from(elem.childNodes).filter((child) => {
  67. return child instanceof Element;
  68. });
  69. }
  70. /**
  71. * Finds namespace-qualified child XML elements.
  72. * @param {!Node} elem The parent XML element.
  73. * @param {string} ns The child XML element's namespace URI.
  74. * @param {string} name The child XML element's local name.
  75. * @return {!Array.<!Element>} The child XML elements.
  76. */
  77. static findChildrenNS(elem, ns, name) {
  78. const found = [];
  79. for (const child of elem.childNodes) {
  80. if (child instanceof Element && child.localName == name &&
  81. child.namespaceURI == ns) {
  82. found.push(child);
  83. }
  84. }
  85. return found;
  86. }
  87. /**
  88. * Gets a namespace-qualified attribute.
  89. * @param {!Element} elem The element to get from.
  90. * @param {string} ns The namespace URI.
  91. * @param {string} name The local name of the attribute.
  92. * @return {?string} The attribute's value, or null if not present.
  93. */
  94. static getAttributeNS(elem, ns, name) {
  95. // Some browsers return the empty string when the attribute is missing,
  96. // so check if it exists first. See: https://mzl.la/2L7F0UK
  97. return elem.hasAttributeNS(ns, name) ? elem.getAttributeNS(ns, name) : null;
  98. }
  99. /**
  100. * Gets a namespace-qualified attribute.
  101. * @param {!Element} elem The element to get from.
  102. * @param {!Array.<string>} nsList The lis of namespace URIs.
  103. * @param {string} name The local name of the attribute.
  104. * @return {?string} The attribute's value, or null if not present.
  105. */
  106. static getAttributeNSList(elem, nsList, name) {
  107. // Some browsers return the empty string when the attribute is missing,
  108. // so check if it exists first. See: https://mzl.la/2L7F0UK
  109. for (const ns of nsList) {
  110. if (elem.hasAttributeNS(ns, name)) {
  111. return elem.getAttributeNS(ns, name);
  112. }
  113. }
  114. return null;
  115. }
  116. /**
  117. * Gets the text contents of a node.
  118. * @param {!Node} elem The XML element.
  119. * @return {?string} The text contents, or null if there are none.
  120. */
  121. static getContents(elem) {
  122. const XmlUtils = shaka.util.XmlUtils;
  123. if (!Array.from(elem.childNodes).every(XmlUtils.isText)) {
  124. return null;
  125. }
  126. // Read merged text content from all text nodes.
  127. return elem.textContent.trim();
  128. }
  129. /**
  130. * Checks if a node is of type text.
  131. * @param {!Node} elem The XML element.
  132. * @return {boolean} True if it is a text node.
  133. */
  134. static isText(elem) {
  135. return elem.nodeType == Node.TEXT_NODE ||
  136. elem.nodeType == Node.CDATA_SECTION_NODE;
  137. }
  138. /**
  139. * Parses an attribute by its name.
  140. * @param {!Element} elem The XML element.
  141. * @param {string} name The attribute name.
  142. * @param {function(string): (T|null)} parseFunction A function that parses
  143. * the attribute.
  144. * @param {(T|null)=} defaultValue The attribute's default value, if not
  145. * specified, the attibute's default value is null.
  146. * @return {(T|null)} The parsed attribute on success, or the attribute's
  147. * default value if the attribute does not exist or could not be parsed.
  148. * @template T
  149. */
  150. static parseAttr(
  151. elem, name, parseFunction, defaultValue = null) {
  152. let parsedValue = null;
  153. const value = elem.getAttribute(name);
  154. if (value != null) {
  155. parsedValue = parseFunction(value);
  156. }
  157. return parsedValue == null ? defaultValue : parsedValue;
  158. }
  159. /**
  160. * Parses an XML date string.
  161. * @param {string} dateString
  162. * @return {?number} The parsed date in seconds on success; otherwise, return
  163. * null.
  164. */
  165. static parseDate(dateString) {
  166. if (!dateString) {
  167. return null;
  168. }
  169. // Times in the manifest should be in UTC. If they don't specify a timezone,
  170. // Date.parse() will use the local timezone instead of UTC. So manually add
  171. // the timezone if missing ('Z' indicates the UTC timezone).
  172. // Format: YYYY-MM-DDThh:mm:ss.ssssss
  173. if (/^\d+-\d+-\d+T\d+:\d+:\d+(\.\d+)?$/.test(dateString)) {
  174. dateString += 'Z';
  175. }
  176. const result = Date.parse(dateString);
  177. return isNaN(result) ? null : (result / 1000.0);
  178. }
  179. /**
  180. * Parses an XML duration string.
  181. * Negative values are not supported. Years and months are treated as exactly
  182. * 365 and 30 days respectively.
  183. * @param {string} durationString The duration string, e.g., "PT1H3M43.2S",
  184. * which means 1 hour, 3 minutes, and 43.2 seconds.
  185. * @return {?number} The parsed duration in seconds on success; otherwise,
  186. * return null.
  187. * @see {@link http://www.datypic.com/sc/xsd/t-xsd_duration.html}
  188. */
  189. static parseDuration(durationString) {
  190. if (!durationString) {
  191. return null;
  192. }
  193. const re = '^P(?:([0-9]*)Y)?(?:([0-9]*)M)?(?:([0-9]*)D)?' +
  194. '(?:T(?:([0-9]*)H)?(?:([0-9]*)M)?(?:([0-9.]*)S)?)?$';
  195. const matches = new RegExp(re).exec(durationString);
  196. if (!matches) {
  197. shaka.log.warning('Invalid duration string:', durationString);
  198. return null;
  199. }
  200. // Note: Number(null) == 0 but Number(undefined) == NaN.
  201. const years = Number(matches[1] || null);
  202. const months = Number(matches[2] || null);
  203. const days = Number(matches[3] || null);
  204. const hours = Number(matches[4] || null);
  205. const minutes = Number(matches[5] || null);
  206. const seconds = Number(matches[6] || null);
  207. // Assume a year always has 365 days and a month always has 30 days.
  208. const d = (60 * 60 * 24 * 365) * years +
  209. (60 * 60 * 24 * 30) * months +
  210. (60 * 60 * 24) * days +
  211. (60 * 60) * hours +
  212. 60 * minutes +
  213. seconds;
  214. return isFinite(d) ? d : null;
  215. }
  216. /**
  217. * Parses a range string.
  218. * @param {string} rangeString The range string, e.g., "101-9213".
  219. * @return {?{start: number, end: number}} The parsed range on success;
  220. * otherwise, return null.
  221. */
  222. static parseRange(rangeString) {
  223. const matches = /([0-9]+)-([0-9]+)/.exec(rangeString);
  224. if (!matches) {
  225. return null;
  226. }
  227. const start = Number(matches[1]);
  228. if (!isFinite(start)) {
  229. return null;
  230. }
  231. const end = Number(matches[2]);
  232. if (!isFinite(end)) {
  233. return null;
  234. }
  235. return {start: start, end: end};
  236. }
  237. /**
  238. * Parses an integer.
  239. * @param {string} intString The integer string.
  240. * @return {?number} The parsed integer on success; otherwise, return null.
  241. */
  242. static parseInt(intString) {
  243. const n = Number(intString);
  244. return (n % 1 === 0) ? n : null;
  245. }
  246. /**
  247. * Parses a positive integer.
  248. * @param {string} intString The integer string.
  249. * @return {?number} The parsed positive integer on success; otherwise,
  250. * return null.
  251. */
  252. static parsePositiveInt(intString) {
  253. const n = Number(intString);
  254. return (n % 1 === 0) && (n > 0) ? n : null;
  255. }
  256. /**
  257. * Parses a non-negative integer.
  258. * @param {string} intString The integer string.
  259. * @return {?number} The parsed non-negative integer on success; otherwise,
  260. * return null.
  261. */
  262. static parseNonNegativeInt(intString) {
  263. const n = Number(intString);
  264. return (n % 1 === 0) && (n >= 0) ? n : null;
  265. }
  266. /**
  267. * Parses a floating point number.
  268. * @param {string} floatString The floating point number string.
  269. * @return {?number} The parsed floating point number on success; otherwise,
  270. * return null. May return -Infinity or Infinity.
  271. */
  272. static parseFloat(floatString) {
  273. const n = Number(floatString);
  274. return !isNaN(n) ? n : null;
  275. }
  276. /**
  277. * Parses a boolean.
  278. * @param {string} booleanString The boolean string.
  279. * @return {boolean} The boolean
  280. */
  281. static parseBoolean(booleanString) {
  282. if (!booleanString) {
  283. return false;
  284. }
  285. return booleanString.toLowerCase() === 'true';
  286. }
  287. /**
  288. * Evaluate a division expressed as a string.
  289. * @param {string} exprString
  290. * The expression to evaluate, e.g. "200/2". Can also be a single number.
  291. * @return {?number} The evaluated expression as floating point number on
  292. * success; otherwise return null.
  293. */
  294. static evalDivision(exprString) {
  295. let res;
  296. let n;
  297. if ((res = exprString.match(/^(\d+)\/(\d+)$/))) {
  298. n = Number(res[1]) / Number(res[2]);
  299. } else {
  300. n = Number(exprString);
  301. }
  302. return !isNaN(n) ? n : null;
  303. }
  304. /**
  305. * Parse a string and return the resulting root element if it was valid XML.
  306. *
  307. * @param {string} xmlString
  308. * @param {string} expectedRootElemName
  309. * @return {Element}
  310. */
  311. static parseXmlString(xmlString, expectedRootElemName) {
  312. const parser = new DOMParser();
  313. const unsafeXmlString =
  314. shaka.util.XmlUtils.trustedHTMLFromString_.value()(xmlString);
  315. let unsafeXml = null;
  316. try {
  317. unsafeXml = parser.parseFromString(unsafeXmlString, 'text/xml');
  318. } catch (exception) {
  319. shaka.log.error('XML parsing exception:', exception);
  320. return null;
  321. }
  322. // According to MDN, parseFromString never returns null.
  323. goog.asserts.assert(unsafeXml, 'Parsed XML document cannot be null!');
  324. // Check for empty documents.
  325. const rootElem = unsafeXml.documentElement;
  326. if (!rootElem) {
  327. shaka.log.error('XML document was empty!');
  328. return null;
  329. }
  330. // Check for parser errors.
  331. const parserErrorElements = rootElem.getElementsByTagName('parsererror');
  332. if (parserErrorElements.length) {
  333. shaka.log.error('XML parser error found:', parserErrorElements[0]);
  334. return null;
  335. }
  336. // The top-level element in the loaded XML should have the name we expect.
  337. if (rootElem.tagName != expectedRootElemName) {
  338. shaka.log.error(
  339. `XML tag name does not match expected "${expectedRootElemName}":`,
  340. rootElem.tagName);
  341. return null;
  342. }
  343. // SECURITY: Verify that the document does not contain elements from the
  344. // HTML or SVG namespaces, which could trigger script execution and XSS.
  345. const iterator = document.createNodeIterator(
  346. unsafeXml,
  347. NodeFilter.SHOW_ALL,
  348. );
  349. let currentNode;
  350. while (currentNode = iterator.nextNode()) {
  351. if (currentNode instanceof HTMLElement ||
  352. currentNode instanceof SVGElement) {
  353. shaka.log.error('XML document embeds unsafe content!');
  354. return null;
  355. }
  356. }
  357. return rootElem;
  358. }
  359. /**
  360. * Parse some data (auto-detecting the encoding) and return the resulting
  361. * root element if it was valid XML.
  362. * @param {BufferSource} data
  363. * @param {string} expectedRootElemName
  364. * @return {Element}
  365. */
  366. static parseXml(data, expectedRootElemName) {
  367. try {
  368. const string = shaka.util.StringUtils.fromBytesAutoDetect(data);
  369. return shaka.util.XmlUtils.parseXmlString(string, expectedRootElemName);
  370. } catch (exception) {
  371. shaka.log.error('parseXmlString threw!', exception);
  372. return null;
  373. }
  374. }
  375. };
  376. /**
  377. * Promote a string to TrustedHTML. This function is security-sensitive and
  378. * should only be used with security approval where the string is guaranteed not
  379. * to cause an XSS vulnerability.
  380. *
  381. * @private {!shaka.util.Lazy.<function(!string): (!TrustedHTML|!string)>}
  382. */
  383. shaka.util.XmlUtils.trustedHTMLFromString_ = new shaka.util.Lazy(() => {
  384. if (typeof trustedTypes !== 'undefined') {
  385. // Create a Trusted Types policy for promoting the string to TrustedHTML.
  386. // The Lazy wrapper ensures this policy is only created once.
  387. const policy = trustedTypes.createPolicy('shaka-player#xml', {
  388. createHTML: (s) => s,
  389. });
  390. return (s) => policy.createHTML(s);
  391. }
  392. // Fall back to strings in environments that don't support Trusted Types.
  393. return (s) => s;
  394. });