1 /** 2 * Copyright (C) 2012-2013 KO GmbH <copyright@kogmbh.com> 3 * 4 * @licstart 5 * This file is part of WebODF. 6 * 7 * WebODF is free software: you can redistribute it and/or modify it 8 * under the terms of the GNU Affero General Public License (GNU AGPL) 9 * as published by the Free Software Foundation, either version 3 of 10 * the License, or (at your option) any later version. 11 * 12 * WebODF is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU Affero General Public License for more details. 16 * 17 * You should have received a copy of the GNU Affero General Public License 18 * along with WebODF. If not, see <http://www.gnu.org/licenses/>. 19 * @licend 20 * 21 * @source: http://www.webodf.org/ 22 * @source: https://github.com/kogmbh/WebODF/ 23 */ 24 25 /*global Node, runtime, odf, NodeFilter, core*/ 26 27 /** 28 * @constructor 29 */ 30 odf.OdfUtilsImpl = function OdfUtilsImpl() { 31 "use strict"; 32 33 var /**@const 34 @type{!string}*/ 35 textns = odf.Namespaces.textns, 36 /**@const 37 @type{!string}*/ 38 drawns = odf.Namespaces.drawns, 39 /**@const 40 @type{!string}*/ 41 xlinkns = odf.Namespaces.xlinkns, 42 domUtils = core.DomUtils, 43 // only add odf element namespaces here. 44 // Namespaces solely used for attributes are excluded. eg. fo, xlink & xml 45 odfNodeNamespaceMap = [ 46 odf.Namespaces.dbns, 47 odf.Namespaces.dcns, 48 odf.Namespaces.dr3dns, 49 odf.Namespaces.drawns, 50 odf.Namespaces.chartns, 51 odf.Namespaces.formns, 52 odf.Namespaces.numberns, 53 odf.Namespaces.officens, 54 odf.Namespaces.presentationns, 55 odf.Namespaces.stylens, 56 odf.Namespaces.svgns, 57 odf.Namespaces.tablens, 58 odf.Namespaces.textns 59 ], 60 odfSchema = odf.OdfSchema; 61 62 /** 63 * Determine if the node is a draw:image element. 64 * @param {?Node} e 65 * @return {!boolean} 66 */ 67 function isImage(e) { 68 var name = e && e.localName; 69 return name === "image" && e.namespaceURI === drawns; 70 } 71 this.isImage = isImage; 72 73 /** 74 * Determine if the node is a draw:frame element and has its text:anchor-type attribute set to 'as-char'. 75 * @param {?Node} e 76 * @return {!boolean} 77 */ 78 function isCharacterFrame(e) { 79 // TODO the anchor-type can be defined on any style associated with the frame 80 return e !== null && e.nodeType === Node.ELEMENT_NODE 81 && e.localName === "frame" && e.namespaceURI === drawns 82 && /**@type{!Element}*/(e).getAttributeNS(textns, "anchor-type") 83 === "as-char"; 84 } 85 this.isCharacterFrame = isCharacterFrame; 86 87 /** 88 * Determine if the node is an office:annotation element. 89 * @param {?Node} e 90 * @return {!boolean} 91 */ 92 function isAnnotation(e) { 93 var name = e && e.localName; 94 return name === "annotation" && e.namespaceURI === odf.Namespaces.officens; 95 } 96 97 /** 98 * Determine if the node is an annotation wrapper element. 99 * @param {?Node} e 100 * @return {!boolean} 101 */ 102 function isAnnotationWrapper(e) { 103 var name = e && e.localName; 104 return name === "div" && /**@type{!HTMLDivElement}*/(e).className === "annotationWrapper"; 105 } 106 107 /** 108 * Determine if the node is an inline 'root' type, 109 * i.e. an office:annotation or a wrapper for an annotaiton. 110 * @param {?Node} e 111 * @return {!boolean} 112 */ 113 function isInlineRoot(e) { 114 return isAnnotation(e) 115 || isAnnotationWrapper(e); 116 } 117 this.isInlineRoot = isInlineRoot; 118 119 /** 120 * Determine if the node is a text:span element. 121 * @param {?Node} e 122 * @return {!boolean} 123 */ 124 this.isTextSpan = function (e) { 125 var name = e && e.localName; 126 return name === "span" && e.namespaceURI === textns; 127 }; 128 129 /** 130 * Determine if the node is a text:a element. 131 * @param {?Node} node 132 * @return {!boolean} 133 */ 134 function isHyperlink(node) { 135 var name = node && node.localName; 136 return name === "a" && node.namespaceURI === textns; 137 } 138 this.isHyperlink = isHyperlink; 139 140 /** 141 * Gets the href attribute of text:a element 142 * @param {!Element} element 143 * @return {!string} 144 */ 145 this.getHyperlinkTarget = function (element) { 146 return element.getAttributeNS(xlinkns, 'href') || ""; 147 }; 148 149 /** 150 * Determine if the node is a text:p or a text:h element. 151 * @param {?Node} e 152 * @return {!boolean} 153 */ 154 function isParagraph(e) { 155 var name = e && e.localName; 156 return (name === "p" || name === "h") && e.namespaceURI === textns; 157 } 158 this.isParagraph = isParagraph; 159 160 /** 161 * Find the paragraph containing the specified node. If an offset is provided and 162 * the node has a child at the specified offset, this will be included in the search 163 * as well if the supplied node is not a paragraph itself. 164 * @param {?Node} node 165 * @param {!number=} offset 166 * @return {?Element} 167 */ 168 function getParagraphElement(node, offset) { 169 if (node && offset !== undefined && !isParagraph(node) && node.childNodes.item(offset)) { 170 node = node.childNodes.item(offset); 171 } 172 while (node && !isParagraph(node)) { 173 node = node.parentNode; 174 } 175 return /**@type{?Element}*/(node); 176 } 177 this.getParagraphElement = getParagraphElement; 178 179 /** 180 * @param {?Node} node Node to start searching with 181 * @param {!Element} container Root container to stop searching at. 182 * @return {?Element} 183 */ 184 function getParentAnnotation(node, container) { 185 while (node && node !== container) { 186 if (node.namespaceURI === odf.Namespaces.officens && node.localName === 'annotation') { 187 return /**@type{!Element}*/(node); 188 } 189 node = node.parentNode; 190 } 191 return null; 192 } 193 this.getParentAnnotation = getParentAnnotation; 194 195 /** 196 * @param {?Node} node Node to start searching with 197 * @param {!Element} container Root container to stop searching at. 198 * @return {!boolean} 199 */ 200 this.isWithinAnnotation = function (node, container) { 201 return Boolean(getParentAnnotation(node, container)); 202 }; 203 204 /** 205 * Gets the creator of an annotation. 206 * @param {!Element} annotationElement 207 * @return {!string} 208 */ 209 this.getAnnotationCreator = function (annotationElement) { 210 var creatorElement = /**@type{!Element}*/(annotationElement.getElementsByTagNameNS(odf.Namespaces.dcns, "creator")[0]); 211 return creatorElement.textContent; 212 }; 213 214 /** 215 * Determine if the node is a text:list-item element. 216 * @param {?Node} e 217 * @return {!boolean} 218 */ 219 this.isListItem = function (e) { 220 var name = e && e.localName; 221 return name === "list-item" && e.namespaceURI === textns; 222 }; 223 224 /** 225 * Determine if the node is a text:line-break element. 226 * @param {?Node} e 227 * @return {!boolean} 228 */ 229 this.isLineBreak = function (e) { 230 var name = e && e.localName; 231 return name === "line-break" && e.namespaceURI === textns; 232 }; 233 234 /** 235 * Determine if the text consists entirely of whitespace characters. 236 * At least one whitespace is required. 237 * @param {!string} text 238 * @return {!boolean} 239 */ 240 function isODFWhitespace(text) { 241 return (/^[ \t\r\n]+$/).test(text); 242 } 243 this.isODFWhitespace = isODFWhitespace; 244 245 /** 246 * Determine if the node is a grouping element. 247 * @param {?Node} n 248 * @return {!boolean} 249 */ 250 function isGroupingElement(n) { 251 if (n === null || n.nodeType !== Node.ELEMENT_NODE) { 252 return false; 253 } 254 var e = /**@type{!Element}*/(n), 255 localName = e.localName; 256 return odfSchema.isTextContainer(e.namespaceURI, localName) 257 || (localName === "span" && e.className === "webodf-annotationHighlight"); 258 } 259 this.isGroupingElement = isGroupingElement; 260 261 /** 262 * @param {?Node} n 263 * @return {!boolean} 264 */ 265 function isFieldElement(n) { 266 if (n === null || n.nodeType !== Node.ELEMENT_NODE) { 267 return false; 268 } 269 var e = /**@type{!Element}*/(n), 270 localName = e.localName; 271 return odfSchema.isField(e.namespaceURI, localName); 272 } 273 this.isFieldElement = isFieldElement; 274 275 /** 276 * Determine if the node is a character element, 277 * namely "s", "tab", or "line-break". 278 * @param {?Node} e 279 * @return {!boolean} 280 */ 281 function isCharacterElement(e) { 282 var n = e && e.localName, 283 ns, 284 r = false; 285 if (n) { 286 ns = e.namespaceURI; 287 if (ns === textns) { 288 r = n === "s" || n === "tab" || n === "line-break"; 289 } 290 } 291 return r; 292 } 293 this.isCharacterElement = isCharacterElement; 294 /** 295 * Determine if the node is an 'as char' type of element, 296 * i.e. any element which behaves like a character with 297 * respect to it's surrounding positions, such as the 298 * space/tab/line-break elements, draw:frames with 299 * anchor type being 'as-char', or inline root elements 300 * such as annotations. 301 * @param {?Node} e 302 * @return {!boolean} 303 */ 304 function isAnchoredAsCharacterElement(e) { 305 return isCharacterElement(e) || isFieldElement(e) || isCharacterFrame(e) || isInlineRoot(e); 306 } 307 this.isAnchoredAsCharacterElement = isAnchoredAsCharacterElement; 308 /** 309 * Determine if the node is a <text:s/> character element. 310 * @param {?Node} e 311 * @return {!boolean} 312 */ 313 function isSpaceElement(e) { 314 var n = e && e.localName, 315 ns, 316 r = false; 317 if (n) { 318 ns = e.namespaceURI; 319 if (ns === textns) { 320 r = n === "s"; 321 } 322 } 323 return r; 324 } 325 this.isSpaceElement = isSpaceElement; 326 327 /** 328 * Returns true if the given node is an odf node 329 * @param {!Node} node 330 * @return {!boolean} 331 */ 332 function isODFNode(node) { 333 return odfNodeNamespaceMap.indexOf(node.namespaceURI) !== -1; 334 } 335 this.isODFNode = isODFNode; 336 337 /** 338 * Returns true if the supplied node contains no text-in-ODF, or ODF elements 339 * @param {!Node} node 340 * @return {!boolean} 341 */ 342 function hasNoODFContent(node) { 343 var childNode; 344 if (isCharacterElement(node) || isFieldElement(node)) { 345 return false; 346 } 347 if (isGroupingElement(/**@type{!Node}*/(node.parentNode)) && node.nodeType === Node.TEXT_NODE) { 348 return node.textContent.length === 0; 349 } 350 childNode = node.firstChild; 351 while (childNode) { 352 if (isODFNode(childNode) || !hasNoODFContent(childNode)) { 353 return false; 354 } 355 childNode = childNode.nextSibling; 356 } 357 return true; 358 } 359 this.hasNoODFContent= hasNoODFContent; 360 361 /** 362 * @param {!Node} node 363 * @return {!Node} 364 */ 365 function firstChild(node) { 366 while (node.firstChild !== null && isGroupingElement(node)) { 367 node = node.firstChild; 368 } 369 return node; 370 } 371 this.firstChild = firstChild; 372 /** 373 * @param {!Node} node 374 * @return {!Node} 375 */ 376 function lastChild(node) { 377 while (node.lastChild !== null && isGroupingElement(node)) { 378 node = node.lastChild; 379 } 380 return node; 381 } 382 this.lastChild = lastChild; 383 /** 384 * @param {!Node} node 385 * @return {?Node} 386 */ 387 function previousNode(node) { 388 while (!isParagraph(node) && node.previousSibling === null) { 389 node = /**@type{!Node}*/(node.parentNode); 390 } 391 return isParagraph(node) ? null : lastChild(/**@type{!Node}*/(node.previousSibling)); 392 } 393 this.previousNode = previousNode; 394 /** 395 * @param {!Node} node 396 * @return {?Node} 397 */ 398 function nextNode(node) { 399 while (!isParagraph(node) && node.nextSibling === null) { 400 node = /**@type{!Node}*/(node.parentNode); 401 } 402 return isParagraph(node) ? null : firstChild(/**@type{!Node}*/(node.nextSibling)); 403 } 404 this.nextNode = nextNode; 405 406 /** 407 * Walk to the left along the DOM and return true if the first thing 408 * encountered is either a non-whitespace text character or a non-space 409 * character element (i.e., any character element other than <text:s/>). 410 * Walking goes through grouping elements. 411 * @param {?Node} node the first node to scan 412 * @return {!boolean} 413 */ 414 function scanLeftForNonSpace(node) { 415 var r = false, 416 text; 417 while (node) { 418 if (node.nodeType === Node.TEXT_NODE) { 419 text = /**@type{!Text}*/(node); 420 if (text.length === 0) { 421 node = previousNode(text); 422 } else { 423 return !isODFWhitespace( 424 text.data.substr(text.length - 1, 1) 425 ); 426 } 427 } else if (isAnchoredAsCharacterElement(node)) { 428 r = isSpaceElement(node) === false; 429 node = null; 430 } else { 431 node = previousNode(node); 432 } 433 } 434 return r; 435 } 436 this.scanLeftForNonSpace = scanLeftForNonSpace; 437 /** 438 * Walk to the left along the DOM and return the type of the first 439 * thing encountered. 440 * 0 none of the below 441 * 1 non-whitespace character or a character element 442 * 2 whitespace character that is preceded by a non-whitespace character 443 * or a character element 444 * 445 * @param {!Node} node the first node to scan 446 * @return {!number} 447 */ 448 function lookLeftForCharacter(node) { 449 var text, r = 0, tl = 0; 450 if (node.nodeType === Node.TEXT_NODE) { 451 tl = /**@type{!Text}*/(node).length; 452 } 453 if (tl > 0) { 454 text = /**@type{!Text}*/(node).data; 455 if (!isODFWhitespace(text.substr(tl - 1, 1))) { 456 r = 1; // character found 457 } else if (tl === 1) { 458 r = scanLeftForNonSpace(previousNode(node)) ? 2 : 0; 459 } else { 460 r = isODFWhitespace(text.substr(tl - 2, 1)) ? 0 : 2; 461 } 462 } else if (isAnchoredAsCharacterElement(node)) { 463 r = 1; 464 } 465 return r; 466 } 467 this.lookLeftForCharacter = lookLeftForCharacter; 468 /** 469 * Look to the right along the DOM and return true if the first thing 470 * encountered is either a non-whitespace character or a character 471 * element. 472 * 473 * @param {?Node} node the first node to scan 474 * @return {!boolean} 475 */ 476 function lookRightForCharacter(node) { 477 var r = false, 478 l = 0; 479 if (node && node.nodeType === Node.TEXT_NODE) { 480 l = /**@type{!Text}*/(node).length; 481 } 482 if (l > 0) { 483 r = !isODFWhitespace(/**@type{!Text}*/(node).data.substr(0, 1)); 484 } else if (isAnchoredAsCharacterElement(node)) { 485 r = true; 486 } 487 return r; 488 } 489 this.lookRightForCharacter = lookRightForCharacter; 490 /** 491 * Walk to the left along the DOM and return true if either a 492 * non-whitespace character or a character element is encountered. 493 * 494 * @param {?Node} node the first node to scan 495 * @return {!boolean} 496 */ 497 function scanLeftForAnyCharacter(node) { 498 var r = false, l; 499 node = node && lastChild(node); 500 while (node) { 501 if (node.nodeType === Node.TEXT_NODE) { 502 l = /**@type{!Text}*/(node).length; 503 } else { 504 l = 0; 505 } 506 if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) { 507 r = true; 508 break; 509 } 510 if (isAnchoredAsCharacterElement(node)) { 511 r = true; 512 break; 513 } 514 node = previousNode(node); 515 } 516 return r; 517 } 518 this.scanLeftForAnyCharacter = scanLeftForAnyCharacter; 519 /** 520 * Walk to the right along the DOM and return true if either a 521 * non-whitespace character or a character element is encountered. 522 * 523 * @param {?Node} node the first node to scan 524 * @return {!boolean} 525 */ 526 function scanRightForAnyCharacter(node) { 527 var r = false, l; 528 node = node && firstChild(node); 529 while (node) { 530 if (node.nodeType === Node.TEXT_NODE) { 531 l = /**@type{!Text}*/(node).length; 532 } else { 533 l = 0; 534 } 535 if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) { 536 r = true; 537 break; 538 } 539 if (isAnchoredAsCharacterElement(node)) { 540 r = true; 541 break; 542 } 543 node = nextNode(node); 544 } 545 return r; 546 } 547 this.scanRightForAnyCharacter = scanRightForAnyCharacter; 548 549 /** 550 * check if the node is part of the trailing whitespace 551 * @param {!Text} textnode 552 * @param {!number} offset 553 * @return {!boolean} 554 */ 555 function isTrailingWhitespace(textnode, offset) { 556 if (!isODFWhitespace(textnode.data.substr(offset))) { 557 return false; 558 } 559 return !scanRightForAnyCharacter(nextNode(textnode)); 560 } 561 this.isTrailingWhitespace = isTrailingWhitespace; 562 563 /** 564 * Takes a textNode and an offset, and returns true if the character 565 * at that offset is a significant whitespace. 566 * 567 * Significant whitespace is defined as: 568 * - Not part of the leading whitespace block in a paragraph 569 * - Not part of the trailing whitespace block in a paragraph 570 * - The first whitespace character after a text node or character 571 * 572 * All other whitespace elements are considered insignificant 573 * @param {!Text} textNode 574 * @param {!number} offset 575 * @return {!boolean} 576 */ 577 function isSignificantWhitespace(textNode, offset) { 578 var text = textNode.data, 579 result; 580 581 if (!isODFWhitespace(text[offset])) { 582 // Character is not whitespace 583 return false; 584 } 585 586 if (isAnchoredAsCharacterElement(textNode.parentNode)) { 587 // Parent is a character element, and therefore does not actually contain text 588 // This prevents a space element from being upgraded again 589 return false; 590 } 591 592 if (offset > 0) { 593 if (!isODFWhitespace(text[offset - 1])) { 594 // First whitespace after a character is significant 595 result = true; 596 } 597 } else if (scanLeftForNonSpace(previousNode(textNode))) { 598 // If the first character found scanning to the left is non-whitespace, this might still be significant 599 result = true; 600 } 601 602 if (result === true) { 603 return isTrailingWhitespace(textNode, offset) 604 ? false : true; 605 } 606 return false; 607 } 608 this.isSignificantWhitespace = isSignificantWhitespace; 609 610 /** 611 * Returns true if the supplied node is a downgradeable space element. 612 * As per http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#element-text_s 613 * a downgradeable whitespace element is a space element that is immediately preceded by something other than a space 614 * and has at least one non-space character after it 615 * @param {!Node} node 616 * @return {!boolean} 617 */ 618 this.isDowngradableSpaceElement = function(node) { 619 if (isSpaceElement(node)) { 620 return scanLeftForNonSpace(previousNode(node)) && scanRightForAnyCharacter(nextNode(node)); 621 } 622 return false; 623 }; 624 625 /** 626 * Returns the length split as value and unit, from an ODF attribute 627 * @param {?string|undefined} length 628 * @return {?{value:!number,unit:!string}} 629 */ 630 function parseLength(length) { 631 var re = /(-?[0-9]*[0-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)|(%))/, 632 m = re.exec(length); 633 if (!m) { 634 return null; 635 } 636 return {value: parseFloat(m[1]), unit: m[3]}; 637 } 638 this.parseLength = parseLength; 639 640 /** 641 * Returns the value and unit of the length, if it is positive ( > 0) 642 * @param {?string|undefined} length 643 * @return {?{value:!number,unit:!string}} 644 */ 645 function parsePositiveLength(length) { 646 var result = parseLength(length); 647 if (result && (result.value <= 0 || result.unit === '%')) { 648 return null; 649 } 650 return result; 651 } 652 653 /** 654 * Returns the value and unit of the length, if it is non-negative ( >= 0) 655 * @param {?string|undefined} length 656 * @return {?{value:!number,unit:!string}} 657 */ 658 function parseNonNegativeLength(length) { 659 var result = parseLength(length); 660 if (result && (result.value < 0 || result.unit === '%')) { 661 return null; 662 } 663 return result; 664 } 665 this.parseNonNegativeLength = parseNonNegativeLength; 666 667 /** 668 * Returns the value and unit(%) of the length, if it is specified in %age 669 * @param {?string|undefined} length 670 * @return {?{value:!number,unit:!string}} 671 */ 672 function parsePercentage(length) { 673 var result = parseLength(length); 674 if (result && (result.unit !== '%')) { 675 return null; 676 } 677 return result; 678 } 679 680 /** 681 * Returns the value and unit of the font size, in conformance with fo:font-size 682 * constraints 683 * @param {?string|undefined} fontSize 684 * @return {?{value:!number,unit:!string}} 685 */ 686 function parseFoFontSize(fontSize) { 687 return parsePositiveLength(fontSize) || parsePercentage(fontSize); 688 } 689 this.parseFoFontSize = parseFoFontSize; 690 691 /** 692 * Returns the value and unit of the line height, in conformance with fo:line-height 693 * constraints 694 * @param {?string|undefined} lineHeight 695 * @return {?{value:!number,unit:!string}} 696 */ 697 function parseFoLineHeight(lineHeight) { 698 return parseNonNegativeLength(lineHeight) || parsePercentage(lineHeight); 699 } 700 this.parseFoLineHeight = parseFoLineHeight; 701 702 /** 703 * Adapted from instructions on how to generate plain text from an ODT document. 704 * See algorithm at http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415196_253892949 705 * @param {!Node} node 706 * @return {boolean} 707 */ 708 function isTextContentContainingNode(node) { 709 switch (node.namespaceURI) { 710 // Namespace skips 711 case odf.Namespaces.drawns: 712 case odf.Namespaces.svgns: 713 case odf.Namespaces.dr3dns: 714 return false; 715 case odf.Namespaces.textns: 716 // Specific node type skips 717 //noinspection FallthroughInSwitchStatementJS 718 switch (node.localName) { 719 case 'note-body': 720 case 'ruby-text': 721 return false; 722 } 723 break; 724 case odf.Namespaces.officens: 725 // Specific node type skips 726 //noinspection FallthroughInSwitchStatementJS 727 switch (node.localName) { 728 case 'annotation': 729 case 'binary-data': 730 case 'event-listeners': 731 return false; 732 } 733 break; 734 default: 735 // Skip webodf edit markers or cursor information 736 switch (node.localName) { 737 case 'cursor': 738 case 'editinfo': 739 return false; 740 } 741 break; 742 } 743 return true; 744 } 745 this.isTextContentContainingNode = isTextContentContainingNode; 746 747 /** 748 * Returns true if the text node is within a paragraph and contains either non-whitespace characters, or 749 * significant whitespace characters (as defined by the ODF standards). 750 * WARNING this method is likely to be quite slow, so should be used as little as possible 751 * @param {!Text} textNode 752 * @return {!boolean} 753 */ 754 function isSignificantTextContent(textNode) { 755 return Boolean(getParagraphElement(textNode) 756 && (!isODFWhitespace(textNode.textContent) || isSignificantWhitespace(textNode, 0))); 757 } 758 759 /** 760 * Remove any nodes that aren't fully contained within the supplied range. This function assumes 761 * the nodes appear in document order. 762 * @param {!Range} range 763 * @param {!Array.<!Node>} nodes 764 * @return {undefined} 765 */ 766 function removePartiallyContainedNodes(range, nodes) { 767 while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[0]))) { 768 nodes.shift(); 769 } 770 while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[nodes.length - 1]))) { 771 nodes.pop(); 772 } 773 } 774 775 /** 776 * Returns a array of text nodes considered to be part of the supplied range. 777 * This will exclude elements that are not part of the ODT main text body, 778 * as well as insignificant whitespace text nodes. 779 * @param {!Range} range Range to search for nodes within 780 * @param {boolean} includePartial Include partially intersecting text nodes 781 * in the result. 782 * @return {!Array.<!Node>} 783 */ 784 function getTextNodes(range, includePartial) { 785 var textNodes; 786 787 /** 788 * @param {!Node} node 789 * @return {number} 790 */ 791 function nodeFilter(node) { 792 var result = NodeFilter.FILTER_REJECT; 793 if (node.nodeType === Node.TEXT_NODE) { 794 if (isSignificantTextContent(/**@type{!Text}*/(node))) { 795 result = NodeFilter.FILTER_ACCEPT; 796 } 797 } else if (isTextContentContainingNode(node)) { 798 result = NodeFilter.FILTER_SKIP; 799 } 800 return result; 801 } 802 803 /*jslint bitwise:true*/ 804 textNodes = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT|NodeFilter.SHOW_TEXT); 805 /*jslint bitwise:false*/ 806 if (!includePartial) { 807 removePartiallyContainedNodes(range, textNodes); 808 } 809 810 return textNodes; 811 } 812 this.getTextNodes = getTextNodes; 813 814 /** 815 * Get all character elements and text nodes fully contained within the 816 * supplied range in document order. 817 * 818 * For example, given the following fragment, with the range starting at b, 819 * and ending at c: 820 * <text:p>ab<text:s/>cd</text:p> 821 * this function would return the following array: 822 * ["b", text:s, "c"] 823 * @param {!Range} range 824 * @param {!boolean} includePartial Include partially intersecting text & 825 * character nodes in the result. 826 * @param {!boolean} includeInsignificantWhitespace Include whitespace only 827 * nodes that are not considered significant text 828 * content. This includes whitespace only elements 829 * used in pretty-formatted xml as LibreOffice 830 * produces in flat ODT files. 831 * @return {!Array.<!Element|!Text>} 832 */ 833 function getTextElements(range, includePartial, includeInsignificantWhitespace) { 834 var elements; 835 836 /** 837 * @param {!Node} node 838 * @return {number} 839 */ 840 function nodeFilter(node) { 841 var result = NodeFilter.FILTER_REJECT; 842 // do not return anything inside an character element or an inline root such as an annotation 843 if (isCharacterElement(node.parentNode) || isFieldElement(node.parentNode) || isInlineRoot(node)) { 844 result = NodeFilter.FILTER_REJECT; 845 } else if (node.nodeType === Node.TEXT_NODE) { 846 if (includeInsignificantWhitespace || isSignificantTextContent(/**@type{!Text}*/(node))) { 847 // Text nodes should only be returned if they are 848 // fully contained within the range. 849 result = NodeFilter.FILTER_ACCEPT; 850 } 851 } else if (isAnchoredAsCharacterElement(node)) { 852 // Character elements should only be returned if they are 853 // fully contained within the range. 854 result = NodeFilter.FILTER_ACCEPT; 855 } else if (isTextContentContainingNode(node) || isGroupingElement(node)) { 856 result = NodeFilter.FILTER_SKIP; 857 } 858 return result; 859 } 860 861 /*jslint bitwise:true*/ 862 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT); 863 /*jslint bitwise:false*/ 864 if (!includePartial) { 865 removePartiallyContainedNodes(range, elements); 866 } 867 868 return elements; 869 } 870 this.getTextElements = getTextElements; 871 872 /** 873 * Crawl parent nodes starting at the startContainer until a matching node is found, 874 * or the first inline root is met. If a node is accepted by the supplied filter, and is 875 * not already the first element in the elements array, this node is prepended to the start 876 * of the elements array. 877 * 878 * @param {!Node} startContainer Container to start search from (inclusive) 879 * @param {!Array.<!Node>} elements Existing elements already discovered 880 * @param {!function(!Node):!boolean} filter 881 */ 882 function prependParentContainers(startContainer, elements, filter) { 883 var container = startContainer; 884 while (container) { 885 if (filter(container)) { 886 if (elements[0] !== container) { 887 elements.unshift(container); 888 } 889 break; 890 } 891 if (isInlineRoot(container)) { 892 break; 893 } 894 container = container.parentNode; 895 } 896 } 897 898 /** 899 * Get all paragraph elements that intersect the supplied range in document 900 * order. 901 * 902 * For example, given the following fragment, with the range starting at b, 903 * and ending at c: 904 * <text:p id="A">ab</text:p><text:p id="B"><text:s/>cd</text:p> 905 * this function would return the following array: 906 * [text:p{id="A"}, text:p{id="B"}] 907 * @param {!Range} range 908 * @return {!Array.<!Element>} 909 */ 910 this.getParagraphElements = function (range) { 911 var elements; 912 /** 913 * @param {!Node} node 914 * @return {number} 915 */ 916 function nodeFilter(node) { 917 var result = NodeFilter.FILTER_REJECT; 918 if (isParagraph(node)) { 919 result = NodeFilter.FILTER_ACCEPT; 920 } else if (isTextContentContainingNode(node) || isGroupingElement(node)) { 921 result = NodeFilter.FILTER_SKIP; 922 } 923 return result; 924 } 925 926 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT); 927 // getNodesInRange will only return nodes it enters during the iteration. 928 // However, we desire all paragraph nodes either contained OR containing this range, 929 // so we crawl the parentNodes of the start container until a root is found. 930 931 // Note, this isn't necessary for the end container because iteration crosses the 932 // node boundary when entering towards the end container, meaning all paragraphs in 933 // the end container's parentNodes will be reported by getNodesInRange. 934 prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isParagraph); 935 return elements; 936 }; 937 938 /** 939 * Get all image elements that fully contained within the supplied range in 940 * document order. 941 * @param {!Range} range 942 * @return {!Array.<Node>} 943 */ 944 this.getImageElements = function (range) { 945 var elements; 946 947 /** 948 * @param {!Node} node 949 * @return {number} 950 */ 951 function nodeFilter(node) { 952 var result = NodeFilter.FILTER_SKIP; 953 if (isImage(node)) { 954 result = NodeFilter.FILTER_ACCEPT; 955 } 956 return result; 957 } 958 959 elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT); 960 // See description in getParagraphElements as to why this is necessary 961 // Short summary: want to include images that completely contain this range 962 prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isImage); 963 return elements; 964 }; 965 966 /** 967 * Returns the node right after the given point. 968 * @param {!Node} container 969 * @param {!number} offset 970 * @return {!Node} 971 */ 972 function getRightNode(container, offset) { 973 var node = container; 974 if (offset < node.childNodes.length - 1) { 975 node = /** @type {!Node} */(node.childNodes[offset + 1]); 976 } else { 977 while (!node.nextSibling) { 978 node = node.parentNode; 979 } 980 node = node.nextSibling; 981 } 982 while (node.firstChild) { 983 node = node.firstChild; 984 } 985 return node; 986 } 987 988 /** 989 * Get all hyperlink elements that intersect the supplied range in document order 990 * 991 * For example, given the following fragment, with the range starting at b, and ending at c: 992 * <text:a xlink:href="google">ab</text:a><text:a xlink:href="apple">cd</text:a> 993 * this function would return the following array: 994 * [text:a{xlink:href="google"}, text:a{xlink:href="apple"}] 995 * @param {!Range} range 996 * @return {!Array.<Node>} 997 */ 998 this.getHyperlinkElements = function (range) { 999 var links = [], 1000 newRange = /** @type {!Range}*/(range.cloneRange()), 1001 node, 1002 textNodes; 1003 1004 if (range.collapsed && range.endContainer.nodeType === Node.ELEMENT_NODE) { 1005 node = getRightNode(range.endContainer, range.endOffset); 1006 if (node.nodeType === Node.TEXT_NODE) { 1007 newRange.setEnd(node, 1); 1008 } 1009 } 1010 1011 textNodes = getTextElements(newRange, true, false); 1012 textNodes.forEach(function (node) { 1013 var parent = node.parentNode; 1014 while (!isParagraph(parent)) { 1015 if (isHyperlink(parent) && links.indexOf(parent) === -1) { 1016 links.push(parent); 1017 break; 1018 } 1019 parent = parent.parentNode; 1020 } 1021 }); 1022 newRange.detach(); 1023 return links; 1024 }; 1025 1026 /** 1027 * Normalize the font-family name as defined in 1028 * http://www.w3.org/TR/2008/REC-CSS2-20080411/fonts.html#propdef-font-family 1029 * (see there text behind: "There are two types of font family names: <family-name>") 1030 * @param {!string} fontFamilyName 1031 * @return {!string} 1032 */ 1033 /*jslint regexp: true*/ 1034 this.getNormalizedFontFamilyName = function(fontFamilyName) { 1035 // not quoted with either single- or double-quotes? 1036 // (\n & \r are syntactically okay as whitespaces, so need to be accepted as well) 1037 // ^(["']) -> match either " or ' at begin (and store match) 1038 // (?:.|[\n\r])*? -> match non-greedy any number of any char or \r and \n 1039 // \1$ -> match content of first match at end 1040 if (!(/^(["'])(?:.|[\n\r])*?\1$/).test(fontFamilyName)) { 1041 // remove any whitespaces at begin and end of full name (ignore internal yet) 1042 // ^[ \t\r\n\f]* -> match whitespace at begin 1043 // ((?:.|[\n\r])*?) -> match non-greedy any number of any char or \r and \n (and store match) 1044 // [ \t\r\n\f]*$ -> match whitespace at end 1045 fontFamilyName = fontFamilyName.replace(/^[ \t\r\n\f]*((?:.|[\n\r])*?)[ \t\r\n\f]*$/, "$1"); 1046 // if there is any internal whitespace, reduce it to just one normal whitespace per group 1047 // and add quotes around the full name 1048 // (quotes should be only added if there is whitespace inside, as the passed fontFamilyName could 1049 // be a generic-family one, which must not be quoted) 1050 if ((/[ \t\r\n\f]/).test(fontFamilyName)) { 1051 fontFamilyName = "'" + fontFamilyName.replace(/[ \t\r\n\f]+/g, " ") + "'"; 1052 } 1053 } 1054 return fontFamilyName; 1055 }; 1056 /*jslint regexp: false*/ 1057 }; 1058 1059 /** 1060 * @type {!odf.OdfUtilsImpl} 1061 */ 1062 odf.OdfUtils = new odf.OdfUtilsImpl();