1 /** 2 * Copyright (C) 2012-2013 KO GmbH <copyright@kogmbh.com> 3 * 4 * @licstart 5 * This file is part of WebODF. 6 * 7 * WebODF is free software: you can redistribute it and/or modify it 8 * under the terms of the GNU Affero General Public License (GNU AGPL) 9 * as published by the Free Software Foundation, either version 3 of 10 * the License, or (at your option) any later version. 11 * 12 * WebODF is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Affero General Public License for more details. 16 * 17 * You should have received a copy of the GNU Affero General Public License 18 * along with WebODF. If not, see <http://www.gnu.org/licenses/>. 19 * @licend 20 * 21 * @source: http://www.webodf.org/ 22 * @source: https://github.com/kogmbh/WebODF/ 23 */ 24 25 /*global Node, runtime, odf, NodeFilter, core*/ 26 27 /** 28 * @constructor 29 */ 30 odf.OdfUtils = function OdfUtils() { 31 "use strict"; 32 33 var /**@const 34 @type{!string}*/ 35 textns = odf.Namespaces.textns, 36 /**@const 37 @type{!string}*/ 38 drawns = odf.Namespaces.drawns, 39 /**@const 40 @type{!string}*/ 41 xlinkns = odf.Namespaces.xlinkns, 42 domUtils = new core.DomUtils(), 43 // only add odf element namespaces here. 44 // Namespaces solely used for attributes are excluded. eg. fo, xlink & xml 45 odfNodeNamespaceMap = [ 46 odf.Namespaces.dbns, 47 odf.Namespaces.dcns, 48 odf.Namespaces.dr3dns, 49 odf.Namespaces.drawns, 50 odf.Namespaces.chartns, 51 odf.Namespaces.formns, 52 odf.Namespaces.numberns, 53 odf.Namespaces.officens, 54 odf.Namespaces.presentationns, 55 odf.Namespaces.stylens, 56 odf.Namespaces.svgns, 57 odf.Namespaces.tablens, 58 odf.Namespaces.textns 59 ]; 60 61 /** 62 * Determine if the node is a draw:image element. 63 * @param {?Node} e 64 * @return {!boolean} 65 */ 66 function isImage(e) { 67 var name = e && e.localName; 68 return name === "image" && e.namespaceURI === drawns; 69 } 70 this.isImage = isImage; 71 72 /** 73 * Determine if the node is a draw:frame element and has its text:anchor-type attribute set to 'as-char'. 74 * @param {?Node} e 75 * @return {!boolean} 76 */ 77 function isCharacterFrame(e) { 78 // TODO the anchor-type can be defined on any style associated with the frame 79 return e !== null && e.nodeType === Node.ELEMENT_NODE 80 && e.localName === "frame" && e.namespaceURI === drawns 81 && /**@type{!Element}*/(e).getAttributeNS(textns, "anchor-type") 82 === "as-char"; 83 } 84 this.isCharacterFrame = isCharacterFrame; 85 86 /** 87 * Determine if the node is an office:annotation element. 88 * @param {?Node} e 89 * @return {!boolean} 90 */ 91 function isAnnotation(e) { 92 var name = e && e.localName; 93 return name === "annotation" && e.namespaceURI === odf.Namespaces.officens; 94 } 95 96 /** 97 * Determine if the node is an annotation wrapper element. 98 * @param {?Node} e 99 * @return {!boolean} 100 */ 101 function isAnnotationWrapper(e) { 102 var name = e && e.localName; 103 return name === "div" && /**@type{!HTMLDivElement}*/(e).className === "annotationWrapper"; 104 } 105 106 /** 107 * Determine if the node is an inline 'root' type, 108 * i.e. an office:annotation or a wrapper for an annotaiton. 109 * @param {?Node} e 110 * @return {!boolean} 111 */ 112 function isInlineRoot(e) { 113 return isAnnotation(e) 114 || isAnnotationWrapper(e); 115 } 116 this.isInlineRoot = isInlineRoot; 117 118 /** 119 * Determine if the node is a text:span element. 120 * @param {?Node} e 121 * @return {!boolean} 122 */ 123 this.isTextSpan = function (e) { 124 var name = e && e.localName; 125 return name === "span" && e.namespaceURI === textns; 126 }; 127 128 /** 129 * Determine if the node is a text:a element. 130 * @param {?Node} node 131 * @return {!boolean} 132 */ 133 function isHyperlink(node) { 134 var name = node && node.localName; 135 return name === "a" && node.namespaceURI === textns; 136 } 137 this.isHyperlink = isHyperlink; 138 139 /** 140 * Gets the href attribute of text:a element 141 * @param {!Element} element 142 * @return {!string} 143 */ 144 this.getHyperlinkTarget = function (element) { 145 return element.getAttributeNS(xlinkns, 'href') || ""; 146 }; 147 148 /** 149 * Determine if the node is a text:p or a text:h element. 150 * @param {?Node} e 151 * @return {!boolean} 152 */ 153 function isParagraph(e) { 154 var name = e && e.localName; 155 return (name === "p" || name === "h") && e.namespaceURI === textns; 156 } 157 this.isParagraph = isParagraph; 158 159 /** 160 * Find the paragraph containing the specified node. If an offset is provided and 161 * the node has a child at the specified offset, this will be included in the search 162 * as well if the supplied node is not a paragraph itself. 163 * @param {?Node} node 164 * @param {!number=} offset 165 * @return {?Element} 166 */ 167 function getParagraphElement(node, offset) { 168 if (node && offset !== undefined && !isParagraph(node) && node.childNodes.item(offset)) { 169 node = node.childNodes.item(offset); 170 } 171 while (node && !isParagraph(node)) { 172 node = node.parentNode; 173 } 174 return /**@type{?Element}*/(node); 175 } 176 this.getParagraphElement = getParagraphElement; 177 178 /** 179 * Returns true if the specified node is contained within a text:tracked-changes parent 180 * @param {Node} node Node to start searching from 181 * @param {!Node} container Root container to stop searching at. This helps set the boundary of the current 182 * search and will usually be the root level element node (e.g., office:text, office:presentation, etc.) 183 * @return {!boolean} 184 */ 185 this.isWithinTrackedChanges = function (node, container) { 186 while (node && node !== container) { 187 if (node.namespaceURI === textns && node.localName === 'tracked-changes') { 188 return true; 189 } 190 node = node.parentNode; 191 } 192 return false; 193 }; 194 195 /** 196 * @param {?Node} node Node to start searching with 197 * @param {!Element} container Root container to stop searching at. 198 * @return {?Element} 199 */ 200 function getParentAnnotation(node, container) { 201 while (node && node !== container) { 202 if (node.namespaceURI === odf.Namespaces.officens && node.localName === 'annotation') { 203 return /**@type{!Element}*/(node); 204 } 205 node = node.parentNode; 206 } 207 return null; 208 } 209 this.getParentAnnotation = getParentAnnotation; 210 211 /** 212 * @param {?Node} node Node to start searching with 213 * @param {!Element} container Root container to stop searching at. 214 * @return {!boolean} 215 */ 216 this.isWithinAnnotation = function (node, container) { 217 return Boolean(getParentAnnotation(node, container)); 218 }; 219 220 /** 221 * Gets the creator of an annotation. 222 * @param {!Element} annotationElement 223 * @return {!string} 224 */ 225 this.getAnnotationCreator = function (annotationElement) { 226 var creatorElement = /**@type{!Element}*/(annotationElement.getElementsByTagNameNS(odf.Namespaces.dcns, "creator")[0]); 227 return creatorElement.textContent; 228 }; 229 230 /** 231 * Determine if the node is a text:list-item element. 232 * @param {?Node} e 233 * @return {!boolean} 234 */ 235 this.isListItem = function (e) { 236 var name = e && e.localName; 237 return name === "list-item" && e.namespaceURI === textns; 238 }; 239 240 /** 241 * Determine if the node is a text:line-break element. 242 * @param {?Node} e 243 * @return {!boolean} 244 */ 245 this.isLineBreak = function (e) { 246 var name = e && e.localName; 247 return name === "line-break" && e.namespaceURI === textns; 248 }; 249 250 /** 251 * Determine if the text consists entirely of whitespace characters. 252 * At least one whitespace is required. 253 * @param {!string} text 254 * @return {!boolean} 255 */ 256 function isODFWhitespace(text) { 257 return (/^[ \t\r\n]+$/).test(text); 258 } 259 this.isODFWhitespace = isODFWhitespace; 260 261 /** 262 * Determine if the node is a grouping element. 263 * @param {?Node} n 264 * @return {!boolean} 265 */ 266 function isGroupingElement(n) { 267 if (n === null || n.nodeType !== Node.ELEMENT_NODE) { 268 return false; 269 } 270 var e = /**@type{!Element}*/(n), 271 localName = e.localName; 272 return (/^(span|p|h|a|meta)$/.test(localName) 273 && e.namespaceURI === textns) 274 || (localName === "span" 275 && e.className === "webodf-annotationHighlight"); 276 } 277 this.isGroupingElement = isGroupingElement; 278 /** 279 * Determine if the node is a character element, 280 * namely "s", "tab", or "line-break". 281 * @param {?Node} e 282 * @return {!boolean} 283 */ 284 function isCharacterElement(e) { 285 var n = e && e.localName, 286 ns, 287 r = false; 288 if (n) { 289 ns = e.namespaceURI; 290 if (ns === textns) { 291 r = n === "s" || n === "tab" || n === "line-break"; 292 } 293 } 294 return r; 295 } 296 this.isCharacterElement = isCharacterElement; 297 /** 298 * Determine if the node is an 'as char' type of element, 299 * i.e. any element which behaves like a character with 300 * respect to it's surrounding positions, such as the 301 * space/tab/line-break elements, draw:frames with 302 * anchor type being 'as-char', or inline root elements 303 * such as annotations. 304 * @param {?Node} e 305 * @return {!boolean} 306 */ 307 function isAnchoredAsCharacterElement(e) { 308 return isCharacterElement(e) || isCharacterFrame(e) || isInlineRoot(e); 309 } 310 this.isAnchoredAsCharacterElement = isAnchoredAsCharacterElement; 311 /** 312 * Determine if the node is a <text:s/> character element. 313 * @param {?Node} e 314 * @return {!boolean} 315 */ 316 function isSpaceElement(e) { 317 var n = e && e.localName, 318 ns, 319 r = false; 320 if (n) { 321 ns = e.namespaceURI; 322 if (ns === textns) { 323 r = n === "s"; 324 } 325 } 326 return r; 327 } 328 this.isSpaceElement = isSpaceElement; 329 330 /** 331 * Returns true if the given node is an odf node 332 * @param {!Node} node 333 * @return {!boolean} 334 */ 335 function isODFNode(node) { 336 return odfNodeNamespaceMap.indexOf(node.namespaceURI) !== -1; 337 } 338 this.isODFNode = isODFNode; 339 340 /** 341 * Returns true if the supplied node contains no text-in-ODF, or ODF elements 342 * @param {!Node} node 343 * @return {!boolean} 344 */ 345 function hasNoODFContent(node) { 346 var childNode; 347 if (isCharacterElement(node)) { 348 return false; 349 } 350 if (isGroupingElement(/**@type{!Node}*/(node.parentNode)) && node.nodeType === Node.TEXT_NODE) { 351 return node.textContent.length === 0; 352 } 353 childNode = node.firstChild; 354 while (childNode) { 355 if (isODFNode(childNode) || !hasNoODFContent(childNode)) { 356 return false; 357 } 358 childNode = childNode.nextSibling; 359 } 360 return true; 361 } 362 this.hasNoODFContent= hasNoODFContent; 363 364 /** 365 * @param {!Node} node 366 * @return {!Node} 367 */ 368 function firstChild(node) { 369 while (node.firstChild !== null && isGroupingElement(node)) { 370 node = node.firstChild; 371 } 372 return node; 373 } 374 this.firstChild = firstChild; 375 /** 376 * @param {!Node} node 377 * @return {!Node} 378 */ 379 function lastChild(node) { 380 while (node.lastChild !== null && isGroupingElement(node)) { 381 node = node.lastChild; 382 } 383 return node; 384 } 385 this.lastChild = lastChild; 386 /** 387 * @param {!Node} node 388 * @return {?Node} 389 */ 390 function previousNode(node) { 391 while (!isParagraph(node) && node.previousSibling === null) { 392 node = /**@type{!Node}*/(node.parentNode); 393 } 394 return isParagraph(node) ? null : lastChild(/**@type{!Node}*/(node.previousSibling)); 395 } 396 this.previousNode = previousNode; 397 /** 398 * @param {!Node} node 399 * @return {?Node} 400 */ 401 function nextNode(node) { 402 while (!isParagraph(node) && node.nextSibling === null) { 403 node = /**@type{!Node}*/(node.parentNode); 404 } 405 return isParagraph(node) ? null : firstChild(/**@type{!Node}*/(node.nextSibling)); 406 } 407 this.nextNode = nextNode; 408 409 /** 410 * Walk to the left along the DOM and return true if the first thing 411 * encountered is either a non-whitespace text character or a non-space 412 * character element (i.e., any character element other than <text:s/>). 413 * Walking goes through grouping elements. 414 * @param {?Node} node the first node to scan 415 * @return {!boolean} 416 */ 417 function scanLeftForNonSpace(node) { 418 var r = false, 419 text; 420 while (node) { 421 if (node.nodeType === Node.TEXT_NODE) { 422 text = /**@type{!Text}*/(node); 423 if (text.length === 0) { 424 node = previousNode(text); 425 } else { 426 return !isODFWhitespace( 427 text.data.substr(text.length - 1, 1) 428 ); 429 } 430 } else if (isAnchoredAsCharacterElement(node)) { 431 r = isSpaceElement(node) === false; 432 node = null; 433 } else { 434 node = previousNode(node); 435 } 436 } 437 return r; 438 } 439 this.scanLeftForNonSpace = scanLeftForNonSpace; 440 /** 441 * Walk to the left along the DOM and return the type of the first 442 * thing encountered. 443 * 0 none of the below 444 * 1 non-whitespace character or a character element 445 * 2 whitespace character that is preceded by a non-whitespace character 446 * or a character element 447 * 448 * @param {!Node} node the first node to scan 449 * @return {!number} 450 */ 451 function lookLeftForCharacter(node) { 452 var text, r = 0, tl = 0; 453 if (node.nodeType === Node.TEXT_NODE) { 454 tl = /**@type{!Text}*/(node).length; 455 } 456 if (tl > 0) { 457 text = /**@type{!Text}*/(node).data; 458 if (!isODFWhitespace(text.substr(tl - 1, 1))) { 459 r = 1; // character found 460 } else if (tl === 1) { 461 r = scanLeftForNonSpace(previousNode(node)) ? 2 : 0; 462 } else { 463 r = isODFWhitespace(text.substr(tl - 2, 1)) ? 0 : 2; 464 } 465 } else if (isAnchoredAsCharacterElement(node)) { 466 r = 1; 467 } 468 return r; 469 } 470 this.lookLeftForCharacter = lookLeftForCharacter; 471 /** 472 * Look to the right along the DOM and return true if the first thing 473 * encountered is either a non-whitespace character or a character 474 * element. 475 * 476 * @param {?Node} node the first node to scan 477 * @return {!boolean} 478 */ 479 function lookRightForCharacter(node) { 480 var r = false, 481 l = 0; 482 if (node && node.nodeType === Node.TEXT_NODE) { 483 l = /**@type{!Text}*/(node).length; 484 } 485 if (l > 0) { 486 r = !isODFWhitespace(/**@type{!Text}*/(node).data.substr(0, 1)); 487 } else if (isAnchoredAsCharacterElement(node)) { 488 r = true; 489 } 490 return r; 491 } 492 this.lookRightForCharacter = lookRightForCharacter; 493 /** 494 * Walk to the left along the DOM and return true if either a 495 * non-whitespace character or a character element is encountered. 496 * 497 * @param {?Node} node the first node to scan 498 * @return {!boolean} 499 */ 500 function scanLeftForAnyCharacter(node) { 501 var r = false, l; 502 node = node && lastChild(node); 503 while (node) { 504 if (node.nodeType === Node.TEXT_NODE) { 505 l = /**@type{!Text}*/(node).length; 506 } else { 507 l = 0; 508 } 509 if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) { 510 r = true; 511 break; 512 } 513 if (isAnchoredAsCharacterElement(node)) { 514 r = true; 515 break; 516 } 517 node = previousNode(node); 518 } 519 return r; 520 } 521 this.scanLeftForAnyCharacter = scanLeftForAnyCharacter; 522 /** 523 * Walk to the right along the DOM and return true if either a 524 * non-whitespace character or a character element is encountered. 525 * 526 * @param {?Node} node the first node to scan 527 * @return {!boolean} 528 */ 529 function scanRightForAnyCharacter(node) { 530 var r = false, l; 531 node = node && firstChild(node); 532 while (node) { 533 if (node.nodeType === Node.TEXT_NODE) { 534 l = /**@type{!Text}*/(node).length; 535 } else { 536 l = 0; 537 } 538 if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) { 539 r = true; 540 break; 541 } 542 if (isAnchoredAsCharacterElement(node)) { 543 r = true; 544 break; 545 } 546 node = nextNode(node); 547 } 548 return r; 549 } 550 this.scanRightForAnyCharacter = scanRightForAnyCharacter; 551 552 /** 553 * check if the node is part of the trailing whitespace 554 * @param {!Text} textnode 555 * @param {!number} offset 556 * @return {!boolean} 557 */ 558 function isTrailingWhitespace(textnode, offset) { 559 if (!isODFWhitespace(textnode.data.substr(offset))) { 560 return false; 561 } 562 return !scanRightForAnyCharacter(nextNode(textnode)); 563 } 564 this.isTrailingWhitespace = isTrailingWhitespace; 565 566 /** 567 * Takes a textNode and an offset, and returns true if the character 568 * at that offset is a significant whitespace. 569 * 570 * Significant whitespace is defined as: 571 * - Not part of the leading whitespace block in a paragraph 572 * - Not part of the trailing whitespace block in a paragraph 573 * - The first whitespace character after a text node or character 574 * 575 * All other whitespace elements are considered insignificant 576 * @param {!Text} textNode 577 * @param {!number} offset 578 * @return {!boolean} 579 */ 580 function isSignificantWhitespace(textNode, offset) { 581 var text = textNode.data, 582 result; 583 584 if (!isODFWhitespace(text[offset])) { 585 // Character is not whitespace 586 return false; 587 } 588 589 if (isAnchoredAsCharacterElement(textNode.parentNode)) { 590 // Parent is a character element, and therefore does not actually contain text 591 // This prevents a space element from being upgraded again 592 return false; 593 } 594 595 if (offset > 0) { 596 if (!isODFWhitespace(text[offset - 1])) { 597 // First whitespace after a character is significant 598 result = true; 599 } 600 } else if (scanLeftForNonSpace(previousNode(textNode))) { 601 // If the first character found scanning to the left is non-whitespace, this might still be significant 602 result = true; 603 } 604 605 if (result === true) { 606 return isTrailingWhitespace(textNode, offset) 607 ? false : true; 608 } 609 return false; 610 } 611 this.isSignificantWhitespace = isSignificantWhitespace; 612 613 /** 614 * Returns true if the supplied node is a downgradeable space element. 615 * As per http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#element-text_s 616 * a downgradeable whitespace element is a space element that is immediately preceded by something other than a space 617 * and has at least one non-space character after it 618 * @param {!Node} node 619 * @return {!boolean} 620 */ 621 this.isDowngradableSpaceElement = function(node) { 622 if (isSpaceElement(node)) { 623 return scanLeftForNonSpace(previousNode(node)) && scanRightForAnyCharacter(nextNode(node)); 624 } 625 return false; 626 }; 627 628 /** 629 * Returns the length split as value and unit, from an ODF attribute 630 * @param {?string|undefined} length 631 * @return {?{value:!number,unit:!string}} 632 */ 633 function parseLength(length) { 634 var re = /(-?[0-9]*[0-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)|(%))/, 635 m = re.exec(length); 636 if (!m) { 637 return null; 638 } 639 return {value: parseFloat(m[1]), unit: m[3]}; 640 } 641 this.parseLength = parseLength; 642 643 /** 644 * Returns the value and unit of the length, if it is positive ( > 0) 645 * @param {?string|undefined} length 646 * @return {?{value:!number,unit:!string}} 647 */ 648 function parsePositiveLength(length) { 649 var result = parseLength(length); 650 if (result && (result.value <= 0 || result.unit === '%')) { 651 return null; 652 } 653 return result; 654 } 655 656 /** 657 * Returns the value and unit of the length, if it is non-negative ( >= 0) 658 * @param {?string|undefined} length 659 * @return {?{value:!number,unit:!string}} 660 */ 661 function parseNonNegativeLength(length) { 662 var result = parseLength(length); 663 if (result && (result.value < 0 || result.unit === '%')) { 664 return null; 665 } 666 return result; 667 } 668 this.parseNonNegativeLength = parseNonNegativeLength; 669 670 /** 671 * Returns the value and unit(%) of the length, if it is specified in %age 672 * @param {?string|undefined} length 673 * @return {?{value:!number,unit:!string}} 674 */ 675 function parsePercentage(length) { 676 var result = parseLength(length); 677 if (result && (result.unit !== '%')) { 678 return null; 679 } 680 return result; 681 } 682 683 /** 684 * Returns the value and unit of the font size, in conformance with fo:font-size 685 * constraints 686 * @param {?string|undefined} fontSize 687 * @return {?{value:!number,unit:!string}} 688 */ 689 function parseFoFontSize(fontSize) { 690 return parsePositiveLength(fontSize) || parsePercentage(fontSize); 691 } 692 this.parseFoFontSize = parseFoFontSize; 693 694 /** 695 * Returns the value and unit of the line height, in conformance with fo:line-height 696 * constraints 697 * @param {?string|undefined} lineHeight 698 * @return {?{value:!number,unit:!string}} 699 */ 700 function parseFoLineHeight(lineHeight) { 701 return parseNonNegativeLength(lineHeight) || parsePercentage(lineHeight); 702 } 703 this.parseFoLineHeight = parseFoLineHeight; 704 705 /** 706 * Adapted from instructions on how to generate plain text from an ODT document. 707 * See algorithm at http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415196_253892949 708 * @param {!Node} node 709 * @return {boolean} 710 */ 711 function isTextContentContainingNode(node) { 712 switch (node.namespaceURI) { 713 // Namespace skips 714 case odf.Namespaces.drawns: 715 case odf.Namespaces.svgns: 716 case odf.Namespaces.dr3dns: 717 return false; 718 case odf.Namespaces.textns: 719 // Specific node type skips 720 //noinspection FallthroughInSwitchStatementJS 721 switch (node.localName) { 722 case 'note-body': 723 case 'ruby-text': 724 return false; 725 } 726 break; 727 case odf.Namespaces.officens: 728 // Specific node type skips 729 //noinspection FallthroughInSwitchStatementJS 730 switch (node.localName) { 731 case 'annotation': 732 case 'binary-data': 733 case 'event-listeners': 734 return false; 735 } 736 break; 737 default: 738 // Skip webodf edit markers or cursor information 739 switch (node.localName) { 740 case 'cursor': 741 case 'editinfo': 742 return false; 743 } 744 break; 745 } 746 return true; 747 } 748 this.isTextContentContainingNode = isTextContentContainingNode; 749 750 /** 751 * Returns true if the text node is within a paragraph and contains either non-whitespace characters, or 752 * significant whitespace characters (as defined by the ODF standards). 753 * WARNING this method is likely to be quite slow, so should be used as little as possible 754 * @param {!Text} textNode 755 * @return {!boolean} 756 */ 757 function isSignificantTextContent(textNode) { 758 return Boolean(getParagraphElement(textNode) 759 && (!isODFWhitespace(textNode.textContent) || isSignificantWhitespace(textNode, 0))); 760 } 761 762 /** 763 * Remove any nodes that aren't fully contained within the supplied range. This function assumes 764 * the nodes appear in document order. 765 * @param {!Range} range 766 * @param {!Array.<!Node>} nodes 767 * @return {undefined} 768 */ 769 function removePartiallyContainedNodes(range, nodes) { 770 while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[0]))) { 771 nodes.shift(); 772 } 773 while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[nodes.length - 1]))) { 774 nodes.pop(); 775 } 776 } 777 778 /** 779 * Returns a array of text nodes considered to be part of the supplied range. 780 * This will exclude elements that are not part of the ODT main text body, 781 * as well as insignificant whitespace text nodes. 782 * @param {!Range} range Range to search for nodes within 783 * @param {boolean} includePartial Include partially intersecting text nodes 784 * in the result. 785 * @return {!Array.<!Node>} 786 */ 787 function getTextNodes(range, includePartial) { 788 var textNodes; 789 790 /** 791 * @param {!Node} node 792 * @return {number} 793 */ 794 function nodeFilter(node) { 795 var result = NodeFilter.FILTER_REJECT; 796 if (node.nodeType === Node.TEXT_NODE) { 797 if (isSignificantTextContent(/**@type{!Text}*/(node))) { 798 result = NodeFilter.FILTER_ACCEPT; 799 } 800 } else if (isTextContentContainingNode(node)) { 801 result = NodeFilter.FILTER_SKIP; 802 } 803 return result; 804 } 805 806 /*jslint bitwise:true*/ 807 textNodes = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT|NodeFilter.SHOW_TEXT); 808 /*jslint bitwise:false*/ 809 if (!includePartial) { 810 removePartiallyContainedNodes(range, textNodes); 811 } 812 813 return textNodes; 814 } 815 this.getTextNodes = getTextNodes; 816 817 /** 818 * Get all character elements and text nodes fully contained within the 819 * supplied range in document order. 820 * 821 * For example, given the following fragment, with the range starting at b, 822 * and ending at c: 823 * <text:p>ab<text:s/>cd</text:p> 824 * this function would return the following array: 825 * ["b", text:s, "c"] 826 * @param {!Range} range 827 * @param {!boolean} includePartial Include partially intersecting text & 828 * character nodes in the result. 829 * @param {!boolean} includeInsignificantWhitespace Include whitespace only 830 * nodes that are not considered significant text 831 * content. This includes whitespace only elements 832 * used in pretty-formatted xml as LibreOffice 833 * produces in flat ODT files. 834 * @return {!Array.<!Element|!Text>} 835 */ 836 function getTextElements(range, includePartial, includeInsignificantWhitespace) { 837 var elements; 838 839 /** 840 * @param {!Node} node 841 * @return {number} 842 */ 843 function nodeFilter(node) { 844 var result = NodeFilter.FILTER_REJECT; 845 // do not return anything inside an character element or an inline root such as an annotation 846 if (isCharacterElement(node.parentNode) || isInlineRoot(node)) { 847 result = NodeFilter.FILTER_REJECT; 848 } else if (node.nodeType === Node.TEXT_NODE) { 849 if (includeInsignificantWhitespace || isSignificantTextContent(/**@type{!Text}*/(node))) { 850 // Text nodes should only be returned if they are 851 // fully contained within the range. 852 result = NodeFilter.FILTER_ACCEPT; 853 } 854 } else if (isAnchoredAsCharacterElement(node)) { 855 // Character elements should only be returned if they are 856 // fully contained within the range. 857 result = NodeFilter.FILTER_ACCEPT; 858 } else if (isTextContentContainingNode(node) || isGroupingElement(node)) { 859 result = NodeFilter.FILTER_SKIP; 860 } 861 return result; 862 } 863 864 /*jslint bitwise:true*/ 865 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT); 866 /*jslint bitwise:false*/ 867 if (!includePartial) { 868 removePartiallyContainedNodes(range, elements); 869 } 870 871 return elements; 872 } 873 this.getTextElements = getTextElements; 874 875 /** 876 * Crawl parent nodes starting at the startContainer until a matching node is found, 877 * or the first inline root is met. If a node is accepted by the supplied filter, and is 878 * not already the first element in the elements array, this node is prepended to the start 879 * of the elements array. 880 * 881 * @param {!Node} startContainer Container to start search from (inclusive) 882 * @param {!Array.<!Node>} elements Existing elements already discovered 883 * @param {!function(!Node):!boolean} filter 884 */ 885 function prependParentContainers(startContainer, elements, filter) { 886 var container = startContainer; 887 while (container) { 888 if (filter(container)) { 889 if (elements[0] !== container) { 890 elements.unshift(container); 891 } 892 break; 893 } 894 if (isInlineRoot(container)) { 895 break; 896 } 897 container = container.parentNode; 898 } 899 } 900 901 /** 902 * Get all paragraph elements that intersect the supplied range in document 903 * order. 904 * 905 * For example, given the following fragment, with the range starting at b, 906 * and ending at c: 907 * <text:p id="A">ab</text:p><text:p id="B"><text:s/>cd</text:p> 908 * this function would return the following array: 909 * [text:p{id="A"}, text:p{id="B"}] 910 * @param {!Range} range 911 * @return {!Array.<!Element>} 912 */ 913 this.getParagraphElements = function (range) { 914 var elements; 915 /** 916 * @param {!Node} node 917 * @return {number} 918 */ 919 function nodeFilter(node) { 920 var result = NodeFilter.FILTER_REJECT; 921 if (isParagraph(node)) { 922 result = NodeFilter.FILTER_ACCEPT; 923 } else if (isTextContentContainingNode(node) || isGroupingElement(node)) { 924 result = NodeFilter.FILTER_SKIP; 925 } 926 return result; 927 } 928 929 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT); 930 // getNodesInRange will only return nodes it enters during the iteration. 931 // However, we desire all paragraph nodes either contained OR containing this range, 932 // so we crawl the parentNodes of the start container until a root is found. 933 934 // Note, this isn't necessary for the end container because iteration crosses the 935 // node boundary when entering towards the end container, meaning all paragraphs in 936 // the end container's parentNodes will be reported by getNodesInRange. 937 prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isParagraph); 938 return elements; 939 }; 940 941 /** 942 * Get all image elements that fully contained within the supplied range in 943 * document order. 944 * @param {!Range} range 945 * @return {!Array.<Node>} 946 */ 947 this.getImageElements = function (range) { 948 var elements; 949 950 /** 951 * @param {!Node} node 952 * @return {number} 953 */ 954 function nodeFilter(node) { 955 var result = NodeFilter.FILTER_SKIP; 956 if (isImage(node)) { 957 result = NodeFilter.FILTER_ACCEPT; 958 } 959 return result; 960 } 961 962 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT); 963 // See description in getParagraphElements as to why this is necessary 964 // Short summary: want to include images that completely contain this range 965 prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isImage); 966 return elements; 967 }; 968 969 /** 970 * Returns the node right after the given point. 971 * @param {!Node} container 972 * @param {!number} offset 973 * @return {!Node} 974 */ 975 function getRightNode(container, offset) { 976 var node = container; 977 if (offset < node.childNodes.length - 1) { 978 node = /** @type {!Node} */(node.childNodes[offset + 1]); 979 } else { 980 while (!node.nextSibling) { 981 node = node.parentNode; 982 } 983 node = node.nextSibling; 984 } 985 while (node.firstChild) { 986 node = node.firstChild; 987 } 988 return node; 989 } 990 991 /** 992 * Get all hyperlink elements that intersect the supplied range in document order 993 * 994 * For example, given the following fragment, with the range starting at b, and ending at c: 995 * <text:a xlink:href="google">ab</text:a><text:a xlink:href="apple">cd</text:a> 996 * this function would return the following array: 997 * [text:a{xlink:href="google"}, text:a{xlink:href="apple"}] 998 * @param {!Range} range 999 * @return {!Array.<Node>} 1000 */ 1001 this.getHyperlinkElements = function (range) { 1002 var links = [], 1003 newRange = /** @type {!Range}*/(range.cloneRange()), 1004 node, 1005 textNodes; 1006 1007 if (range.collapsed && range.endContainer.nodeType === Node.ELEMENT_NODE) { 1008 node = getRightNode(range.endContainer, range.endOffset); 1009 if (node.nodeType === Node.TEXT_NODE) { 1010 newRange.setEnd(node, 1); 1011 } 1012 } 1013 1014 textNodes = getTextElements(newRange, true, false); 1015 textNodes.forEach(function (node) { 1016 var parent = node.parentNode; 1017 while (!isParagraph(parent)) { 1018 if (isHyperlink(parent) && links.indexOf(parent) === -1) { 1019 links.push(parent); 1020 break; 1021 } 1022 parent = parent.parentNode; 1023 } 1024 }); 1025 newRange.detach(); 1026 return links; 1027 }; 1028 1029 /** 1030 * Normalize the font-family name as defined in 1031 * http://www.w3.org/TR/2008/REC-CSS2-20080411/fonts.html#propdef-font-family 1032 * (see there text behind: "There are two types of font family names: <family-name>") 1033 * @param {!string} fontFamilyName 1034 * @return {!string} 1035 */ 1036 /*jslint regexp: true*/ 1037 this.getNormalizedFontFamilyName = function(fontFamilyName) { 1038 // not quoted with either single- or double-quotes? 1039 // (\n & \r are syntactically okay as whitespaces, so need to be accepted as well) 1040 // ^(["']) -> match either " or ' at begin (and store match) 1041 // (?:.|[\n\r])*? -> match non-greedy any number of any char or \r and \n 1042 // \1$ -> match content of first match at end 1043 if (!(/^(["'])(?:.|[\n\r])*?\1$/).test(fontFamilyName)) { 1044 // remove any whitespaces at begin and end of full name (ignore internal yet) 1045 // ^[ \t\r\n\f]* -> match whitespace at begin 1046 // ((?:.|[\n\r])*?) -> match non-greedy any number of any char or \r and \n (and store match) 1047 // [ \t\r\n\f]*$ -> match whitespace at end 1048 fontFamilyName = fontFamilyName.replace(/^[ \t\r\n\f]*((?:.|[\n\r])*?)[ \t\r\n\f]*$/, "$1"); 1049 // if there is any internal whitespace, reduce it to just one normal whitespace per group 1050 // and add quotes around the full name 1051 // (quotes should be only added if there is whitespace inside, as the passed fontFamilyName could 1052 // be a generic-family one, which must not be quoted) 1053 if ((/[ \t\r\n\f]/).test(fontFamilyName)) { 1054 fontFamilyName = "'" + fontFamilyName.replace(/[ \t\r\n\f]+/g, " ") + "'"; 1055 } 1056 } 1057 return fontFamilyName; 1058 }; 1059 /*jslint regexp: false*/ 1060 }; 1061