1 /**
  2  * Copyright (C) 2012-2013 KO GmbH <copyright@kogmbh.com>
  3  *
  4  * @licstart
  5  * This file is part of WebODF.
  6  *
  7  * WebODF is free software: you can redistribute it and/or modify it
  8  * under the terms of the GNU Affero General Public License (GNU AGPL)
  9  * as published by the Free Software Foundation, either version 3 of
 10  * the License, or (at your option) any later version.
 11  *
 12  * WebODF is distributed in the hope that it will be useful, but
 13  * WITHOUT ANY WARRANTY; without even the implied warranty of
 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15  * GNU Affero General Public License for more details.
 16  *
 17  * You should have received a copy of the GNU Affero General Public License
 18  * along with WebODF.  If not, see <http://www.gnu.org/licenses/>.
 19  * @licend
 20  *
 21  * @source: http://www.webodf.org/
 22  * @source: https://github.com/kogmbh/WebODF/
 23  */
 24 
 25 /*global Node, runtime, odf, NodeFilter, core*/
 26 
 27 /**
 28  * @constructor
 29  */
 30 odf.OdfUtilsImpl = function OdfUtilsImpl() {
 31     "use strict";
 32 
 33     var /**@const
 34            @type{!string}*/
 35         textns = odf.Namespaces.textns,
 36         /**@const
 37            @type{!string}*/
 38         drawns = odf.Namespaces.drawns,
 39         /**@const
 40            @type{!string}*/
 41         xlinkns = odf.Namespaces.xlinkns,
 42         domUtils = core.DomUtils,
 43         // only add odf element namespaces here.
 44         // Namespaces solely used for attributes are excluded. eg. fo, xlink & xml
 45         odfNodeNamespaceMap = [
 46             odf.Namespaces.dbns,
 47             odf.Namespaces.dcns,
 48             odf.Namespaces.dr3dns,
 49             odf.Namespaces.drawns,
 50             odf.Namespaces.chartns,
 51             odf.Namespaces.formns,
 52             odf.Namespaces.numberns,
 53             odf.Namespaces.officens,
 54             odf.Namespaces.presentationns,
 55             odf.Namespaces.stylens,
 56             odf.Namespaces.svgns,
 57             odf.Namespaces.tablens,
 58             odf.Namespaces.textns
 59         ],
 60         odfSchema = odf.OdfSchema;
 61 
 62     /**
 63      * Determine if the node is a draw:image element.
 64      * @param {?Node} e
 65      * @return {!boolean}
 66      */
 67     function isImage(e) {
 68         var name = e && e.localName;
 69         return name === "image" && e.namespaceURI === drawns;
 70     }
 71     this.isImage = isImage;
 72 
 73     /**
 74      * Determine if the node is a draw:frame element and has its text:anchor-type attribute set to 'as-char'.
 75      * @param {?Node} e
 76      * @return {!boolean}
 77      */
 78     function isCharacterFrame(e) {
 79         // TODO the anchor-type can be defined on any style associated with the frame
 80         return e !== null && e.nodeType === Node.ELEMENT_NODE
 81             && e.localName === "frame" && e.namespaceURI === drawns
 82             && /**@type{!Element}*/(e).getAttributeNS(textns, "anchor-type")
 83                 === "as-char";
 84     }
 85     this.isCharacterFrame = isCharacterFrame;
 86 
 87     /**
 88      * Determine if the node is an office:annotation element.
 89      * @param {?Node} e
 90      * @return {!boolean}
 91      */
 92     function isAnnotation(e) {
 93         var name = e && e.localName;
 94         return name === "annotation" && e.namespaceURI === odf.Namespaces.officens;
 95     }
 96 
 97     /**
 98      * Determine if the node is an annotation wrapper element.
 99      * @param {?Node} e
100      * @return {!boolean}
101      */
102     function isAnnotationWrapper(e) {
103         var name = e && e.localName;
104         return name === "div" && /**@type{!HTMLDivElement}*/(e).className === "annotationWrapper";
105     }
106 
107     /**
108      * Determine if the node is an inline 'root' type,
109      * i.e. an office:annotation or a wrapper for an annotaiton.
110      * @param {?Node} e
111      * @return {!boolean}
112      */
113     function isInlineRoot(e) {
114         return isAnnotation(e)
115             || isAnnotationWrapper(e);
116     }
117     this.isInlineRoot = isInlineRoot;
118 
119     /**
120      * Determine if the node is a text:span element.
121      * @param {?Node} e
122      * @return {!boolean}
123      */
124     this.isTextSpan = function (e) {
125         var name = e && e.localName;
126         return name === "span" && e.namespaceURI === textns;
127     };
128 
129     /**
130      * Determine if the node is a text:a element.
131      * @param {?Node} node
132      * @return {!boolean}
133      */
134     function isHyperlink(node) {
135         var name = node && node.localName;
136         return name === "a" && node.namespaceURI === textns;
137     }
138     this.isHyperlink = isHyperlink;
139 
140     /**
141      * Gets the href attribute of text:a element
142      * @param {!Element} element
143      * @return {!string}
144      */
145     this.getHyperlinkTarget = function (element) {
146         return element.getAttributeNS(xlinkns, 'href') || "";
147     };
148 
149     /**
150      * Determine if the node is a text:p or a text:h element.
151      * @param {?Node} e
152      * @return {!boolean}
153      */
154     function isParagraph(e) {
155         var name = e && e.localName;
156         return (name === "p" || name === "h") && e.namespaceURI === textns;
157     }
158     this.isParagraph = isParagraph;
159 
160     /**
161      * Find the paragraph containing the specified node. If an offset is provided and
162      * the node has a child at the specified offset, this will be included in the search
163      * as well if the supplied node is not a paragraph itself.
164      * @param {?Node} node
165      * @param {!number=} offset
166      * @return {?Element}
167      */
168     function getParagraphElement(node, offset) {
169         if (node && offset !== undefined && !isParagraph(node) && node.childNodes.item(offset)) {
170             node = node.childNodes.item(offset);
171         }
172         while (node && !isParagraph(node)) {
173             node = node.parentNode;
174         }
175         return /**@type{?Element}*/(node);
176     }
177     this.getParagraphElement = getParagraphElement;
178 
179     /**
180      * @param {?Node} node  Node to start searching with
181      * @param {!Element} container  Root container to stop searching at.
182      * @return {?Element}
183      */
184     function getParentAnnotation(node, container) {
185          while (node && node !== container) {
186             if (node.namespaceURI === odf.Namespaces.officens && node.localName === 'annotation') {
187                 return /**@type{!Element}*/(node);
188             }
189             node = node.parentNode;
190         }
191         return null;
192     }
193     this.getParentAnnotation = getParentAnnotation;
194 
195     /**
196      * @param {?Node} node  Node to start searching with
197      * @param {!Element} container  Root container to stop searching at.
198      * @return {!boolean}
199      */
200     this.isWithinAnnotation = function (node, container) {
201         return Boolean(getParentAnnotation(node, container));
202     };
203 
204     /**
205      * Gets the creator of an annotation.
206      * @param {!Element} annotationElement
207      * @return {!string}
208      */
209     this.getAnnotationCreator = function (annotationElement) {
210         var creatorElement = /**@type{!Element}*/(annotationElement.getElementsByTagNameNS(odf.Namespaces.dcns, "creator")[0]);
211         return creatorElement.textContent;
212     };
213 
214     /**
215      * Determine if the node is a text:list-item element.
216      * @param {?Node} e
217      * @return {!boolean}
218      */
219     this.isListItem = function (e) {
220         var name = e && e.localName;
221         return name === "list-item" && e.namespaceURI === textns;
222     };
223 
224     /**
225      * Determine if the node is a text:line-break element.
226      * @param {?Node} e
227      * @return {!boolean}
228      */
229     this.isLineBreak = function (e) {
230         var name = e && e.localName;
231         return name === "line-break" && e.namespaceURI === textns;
232     };
233 
234     /**
235      * Determine if the text consists entirely of whitespace characters.
236      * At least one whitespace is required.
237      * @param {!string} text
238      * @return {!boolean}
239      */
240     function isODFWhitespace(text) {
241         return (/^[ \t\r\n]+$/).test(text);
242     }
243     this.isODFWhitespace = isODFWhitespace;
244 
245     /**
246      * Determine if the node is a grouping element.
247      * @param {?Node} n
248      * @return {!boolean}
249      */
250     function isGroupingElement(n) {
251         if (n === null || n.nodeType !== Node.ELEMENT_NODE) {
252             return false;
253         }
254         var e = /**@type{!Element}*/(n),
255             localName = e.localName;
256         return odfSchema.isTextContainer(e.namespaceURI, localName)
257                || (localName === "span" && e.className === "webodf-annotationHighlight");
258     }
259     this.isGroupingElement = isGroupingElement;
260 
261     /**
262      * @param {?Node} n
263      * @return {!boolean}
264      */
265     function isFieldElement(n) {
266         if (n === null || n.nodeType !== Node.ELEMENT_NODE) {
267             return false;
268         }
269         var e = /**@type{!Element}*/(n),
270             localName = e.localName;
271         return odfSchema.isField(e.namespaceURI, localName);
272     }
273     this.isFieldElement = isFieldElement;
274 
275     /**
276      * Determine if the node is a character element,
277      * namely "s", "tab", or "line-break".
278      * @param {?Node} e
279      * @return {!boolean}
280      */
281     function isCharacterElement(e) {
282         var n = e && e.localName,
283             ns,
284             r = false;
285         if (n) {
286             ns = e.namespaceURI;
287             if (ns === textns) {
288                 r = n === "s" || n === "tab" || n === "line-break";
289             }
290         }
291         return r;
292     }
293     this.isCharacterElement = isCharacterElement;
294     /**
295      * Determine if the node is an 'as char' type of element,
296      * i.e. any element which behaves like a character with
297      * respect to it's surrounding positions, such as the
298      * space/tab/line-break elements, draw:frames with
299      * anchor type being 'as-char', or inline root elements
300      * such as annotations.
301      * @param {?Node} e
302      * @return {!boolean}
303      */
304     function isAnchoredAsCharacterElement(e) {
305         return isCharacterElement(e) || isFieldElement(e) || isCharacterFrame(e) || isInlineRoot(e);
306     }
307     this.isAnchoredAsCharacterElement = isAnchoredAsCharacterElement;
308     /**
309      * Determine if the node is a <text:s/> character element.
310      * @param {?Node} e
311      * @return {!boolean}
312      */
313     function isSpaceElement(e) {
314         var n = e && e.localName,
315             ns,
316             r = false;
317         if (n) {
318             ns = e.namespaceURI;
319             if (ns === textns) {
320                 r = n === "s";
321             }
322         }
323         return r;
324     }
325     this.isSpaceElement = isSpaceElement;
326 
327     /**
328      * Returns true if the given node is an odf node
329      * @param {!Node} node
330      * @return {!boolean}
331      */
332     function isODFNode(node) {
333         return odfNodeNamespaceMap.indexOf(node.namespaceURI) !== -1;
334     }
335     this.isODFNode = isODFNode;
336 
337     /**
338      * Returns true if the supplied node contains no text-in-ODF, or ODF elements
339      * @param {!Node} node
340      * @return {!boolean}
341      */
342     function hasNoODFContent(node) {
343         var childNode;
344         if (isCharacterElement(node) || isFieldElement(node)) {
345             return false;
346         }
347         if (isGroupingElement(/**@type{!Node}*/(node.parentNode)) && node.nodeType === Node.TEXT_NODE) {
348             return node.textContent.length === 0;
349         }
350         childNode = node.firstChild;
351         while (childNode) {
352             if (isODFNode(childNode) || !hasNoODFContent(childNode)) {
353                 return false;
354             }
355             childNode = childNode.nextSibling;
356         }
357         return true;
358     }
359     this.hasNoODFContent= hasNoODFContent;
360 
361     /**
362      * @param {!Node} node
363      * @return {!Node}
364      */
365     function firstChild(node) {
366         while (node.firstChild !== null && isGroupingElement(node)) {
367             node = node.firstChild;
368         }
369         return node;
370     }
371     this.firstChild = firstChild;
372     /**
373      * @param {!Node} node
374      * @return {!Node}
375      */
376     function lastChild(node) {
377         while (node.lastChild !== null && isGroupingElement(node)) {
378             node = node.lastChild;
379         }
380         return node;
381     }
382     this.lastChild = lastChild;
383     /**
384      * @param {!Node} node
385      * @return {?Node}
386      */
387     function previousNode(node) {
388         while (!isParagraph(node) && node.previousSibling === null) {
389             node = /**@type{!Node}*/(node.parentNode);
390         }
391         return isParagraph(node) ? null : lastChild(/**@type{!Node}*/(node.previousSibling));
392     }
393     this.previousNode = previousNode;
394     /**
395      * @param {!Node} node
396      * @return {?Node}
397      */
398     function nextNode(node) {
399         while (!isParagraph(node) && node.nextSibling === null) {
400             node = /**@type{!Node}*/(node.parentNode);
401         }
402         return isParagraph(node) ? null : firstChild(/**@type{!Node}*/(node.nextSibling));
403     }
404     this.nextNode = nextNode;
405 
406     /**
407      * Walk to the left along the DOM and return true if the first thing
408      * encountered is either a non-whitespace text character or a non-space
409      * character element (i.e., any character element other than <text:s/>).
410      * Walking goes through grouping elements.
411      * @param {?Node} node the first node to scan
412      * @return {!boolean}
413      */
414     function scanLeftForNonSpace(node) {
415         var r = false,
416             text;
417         while (node) {
418             if (node.nodeType === Node.TEXT_NODE) {
419                 text = /**@type{!Text}*/(node);
420                 if (text.length === 0) {
421                     node = previousNode(text);
422                 } else {
423                     return !isODFWhitespace(
424                         text.data.substr(text.length - 1, 1)
425                     );
426                 }
427             } else if (isAnchoredAsCharacterElement(node)) {
428                 r = isSpaceElement(node) === false;
429                 node = null;
430             } else {
431                 node = previousNode(node);
432             }
433         }
434         return r;
435     }
436     this.scanLeftForNonSpace = scanLeftForNonSpace;
437     /**
438      * Walk to the left along the DOM and return the type of the first
439      * thing encountered.
440      * 0 none of the below
441      * 1 non-whitespace character or a character element
442      * 2 whitespace character that is preceded by a non-whitespace character
443      *   or a character element
444      *
445      * @param {!Node} node the first node to scan
446      * @return {!number}
447      */
448     function lookLeftForCharacter(node) {
449         var text, r = 0, tl = 0;
450         if (node.nodeType === Node.TEXT_NODE) {
451             tl = /**@type{!Text}*/(node).length;
452         }
453         if (tl > 0) {
454             text = /**@type{!Text}*/(node).data;
455             if (!isODFWhitespace(text.substr(tl - 1, 1))) {
456                 r = 1; // character found
457             } else if (tl === 1) {
458                 r = scanLeftForNonSpace(previousNode(node)) ? 2 : 0;
459             } else {
460                 r = isODFWhitespace(text.substr(tl - 2, 1)) ? 0 : 2;
461             }
462         } else if (isAnchoredAsCharacterElement(node)) {
463             r = 1;
464         }
465         return r;
466     }
467     this.lookLeftForCharacter = lookLeftForCharacter;
468     /**
469      * Look to the right along the DOM and return true if the first thing
470      * encountered is either a non-whitespace character or a character
471      * element.
472      *
473      * @param {?Node} node the first node to scan
474      * @return {!boolean}
475      */
476     function lookRightForCharacter(node) {
477         var r = false,
478             l = 0;
479         if (node && node.nodeType === Node.TEXT_NODE) {
480             l = /**@type{!Text}*/(node).length;
481         }
482         if (l > 0) {
483             r = !isODFWhitespace(/**@type{!Text}*/(node).data.substr(0, 1));
484         } else if (isAnchoredAsCharacterElement(node)) {
485             r = true;
486         }
487         return r;
488     }
489     this.lookRightForCharacter = lookRightForCharacter;
490     /**
491      * Walk to the left along the DOM and return true if either a
492      * non-whitespace character or a character element is encountered.
493      *
494      * @param {?Node} node the first node to scan
495      * @return {!boolean}
496      */
497     function scanLeftForAnyCharacter(node) {
498         var r = false, l;
499         node = node && lastChild(node);
500         while (node) {
501             if (node.nodeType === Node.TEXT_NODE) {
502                 l = /**@type{!Text}*/(node).length;
503             } else {
504                 l = 0;
505             }
506             if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) {
507                 r = true;
508                 break;
509             }
510             if (isAnchoredAsCharacterElement(node)) {
511                 r = true;
512                 break;
513             }
514             node = previousNode(node);
515         }
516         return r;
517     }
518     this.scanLeftForAnyCharacter = scanLeftForAnyCharacter;
519     /**
520      * Walk to the right along the DOM and return true if either a
521      * non-whitespace character or a character element is encountered.
522      *
523      * @param {?Node} node the first node to scan
524      * @return {!boolean}
525      */
526     function scanRightForAnyCharacter(node) {
527         var r = false, l;
528         node = node && firstChild(node);
529         while (node) {
530             if (node.nodeType === Node.TEXT_NODE) {
531                 l = /**@type{!Text}*/(node).length;
532             } else {
533                 l = 0;
534             }
535             if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) {
536                 r = true;
537                 break;
538             }
539             if (isAnchoredAsCharacterElement(node)) {
540                 r = true;
541                 break;
542             }
543             node = nextNode(node);
544         }
545         return r;
546     }
547     this.scanRightForAnyCharacter = scanRightForAnyCharacter;
548 
549     /**
550      * check if the node is part of the trailing whitespace
551      * @param {!Text} textnode
552      * @param {!number} offset
553      * @return {!boolean}
554      */
555     function isTrailingWhitespace(textnode, offset) {
556         if (!isODFWhitespace(textnode.data.substr(offset))) {
557             return false;
558         }
559         return !scanRightForAnyCharacter(nextNode(textnode));
560     }
561     this.isTrailingWhitespace = isTrailingWhitespace;
562 
563     /**
564      * Takes a textNode and an offset, and returns true if the character
565      * at that offset is a significant whitespace.
566      *
567      * Significant whitespace is defined as:
568      * - Not part of the leading whitespace block in a paragraph
569      * - Not part of the trailing whitespace block in a paragraph
570      * - The first whitespace character after a text node or character
571      *
572      * All other whitespace elements are considered insignificant
573      * @param {!Text} textNode
574      * @param {!number} offset
575      * @return {!boolean}
576      */
577     function isSignificantWhitespace(textNode, offset) {
578         var text = textNode.data,
579             result;
580 
581         if (!isODFWhitespace(text[offset])) {
582             // Character is not whitespace
583             return false;
584         }
585 
586         if (isAnchoredAsCharacterElement(textNode.parentNode)) {
587             // Parent is a character element, and therefore does not actually contain text
588             // This prevents a space element from being upgraded again
589             return false;
590         }
591 
592         if (offset > 0) {
593             if (!isODFWhitespace(text[offset - 1])) {
594                 // First whitespace after a character is significant
595                 result = true;
596             }
597         } else if (scanLeftForNonSpace(previousNode(textNode))) {
598             // If the first character found scanning to the left is non-whitespace, this might still be significant
599             result = true;
600         }
601 
602         if (result === true) {
603             return isTrailingWhitespace(textNode, offset)
604                 ? false : true;
605         }
606         return false;
607     }
608     this.isSignificantWhitespace = isSignificantWhitespace;
609 
610     /**
611      * Returns true if the supplied node is a downgradeable space element.
612      * As per http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#element-text_s
613      * a downgradeable whitespace element is a space element that is immediately preceded by something other than a space
614      * and has at least one non-space character after it
615      * @param {!Node} node
616      * @return {!boolean}
617      */
618     this.isDowngradableSpaceElement = function(node) {
619         if (isSpaceElement(node)) {
620             return scanLeftForNonSpace(previousNode(node)) && scanRightForAnyCharacter(nextNode(node));
621         }
622         return false;
623     };
624 
625     /**
626      * Returns the length split as value and unit, from an ODF attribute
627      * @param {?string|undefined} length
628      * @return {?{value:!number,unit:!string}}
629      */
630     function parseLength(length) {
631         var re = /(-?[0-9]*[0-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)|(%))/,
632             m = re.exec(length);
633         if (!m) {
634             return null;
635         }
636         return {value: parseFloat(m[1]), unit: m[3]};
637     }
638     this.parseLength = parseLength;
639 
640     /**
641      * Returns the value and unit of the length, if it is positive ( > 0)
642      * @param {?string|undefined} length
643      * @return {?{value:!number,unit:!string}}
644      */
645     function parsePositiveLength(length) {
646         var result = parseLength(length);
647         if (result && (result.value <= 0 || result.unit === '%')) {
648             return null;
649         }
650         return result;
651     }
652 
653     /**
654      * Returns the value and unit of the length, if it is non-negative ( >= 0)
655      * @param {?string|undefined} length
656      * @return {?{value:!number,unit:!string}}
657      */
658     function parseNonNegativeLength(length) {
659         var result = parseLength(length);
660         if (result && (result.value < 0 || result.unit === '%')) {
661             return null;
662         }
663         return result;
664     }
665     this.parseNonNegativeLength = parseNonNegativeLength;
666 
667     /**
668      * Returns the value and unit(%) of the length, if it is specified in %age
669      * @param {?string|undefined} length
670      * @return {?{value:!number,unit:!string}}
671      */
672     function parsePercentage(length) {
673         var result = parseLength(length);
674         if (result && (result.unit !== '%')) {
675             return null;
676         }
677         return result;
678     }
679 
680     /**
681      * Returns the value and unit of the font size, in conformance with fo:font-size
682      * constraints
683      * @param {?string|undefined} fontSize
684      * @return {?{value:!number,unit:!string}}
685      */
686     function parseFoFontSize(fontSize) {
687         return parsePositiveLength(fontSize) || parsePercentage(fontSize);
688     }
689     this.parseFoFontSize = parseFoFontSize;
690 
691     /**
692      * Returns the value and unit of the line height, in conformance with fo:line-height
693      * constraints
694      * @param {?string|undefined} lineHeight
695      * @return {?{value:!number,unit:!string}}
696      */
697     function parseFoLineHeight(lineHeight) {
698         return parseNonNegativeLength(lineHeight) || parsePercentage(lineHeight);
699     }
700     this.parseFoLineHeight = parseFoLineHeight;
701 
702     /**
703      * Adapted from instructions on how to generate plain text from an ODT document.
704      * See algorithm at http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415196_253892949
705      * @param {!Node} node
706      * @return {boolean}
707      */
708     function isTextContentContainingNode(node) {
709         switch (node.namespaceURI) {
710             // Namespace skips
711         case odf.Namespaces.drawns:
712         case odf.Namespaces.svgns:
713         case odf.Namespaces.dr3dns:
714             return false;
715         case odf.Namespaces.textns:
716             // Specific node type skips
717             //noinspection FallthroughInSwitchStatementJS
718             switch (node.localName) {
719             case 'note-body':
720             case 'ruby-text':
721                 return false;
722             }
723             break;
724         case odf.Namespaces.officens:
725             // Specific node type skips
726             //noinspection FallthroughInSwitchStatementJS
727             switch (node.localName) {
728             case 'annotation':
729             case 'binary-data':
730             case 'event-listeners':
731                 return false;
732             }
733             break;
734         default:
735             // Skip webodf edit markers or cursor information
736             switch (node.localName) {
737             case 'cursor':
738             case 'editinfo':
739                 return false;
740             }
741             break;
742         }
743         return true;
744     }
745     this.isTextContentContainingNode = isTextContentContainingNode;
746 
747     /**
748      * Returns true if the text node is within a paragraph and contains either non-whitespace characters, or
749      * significant whitespace characters (as defined by the ODF standards).
750      * WARNING this method is likely to be quite slow, so should be used as little as possible
751      * @param {!Text} textNode
752      * @return {!boolean}
753      */
754     function isSignificantTextContent(textNode) {
755         return Boolean(getParagraphElement(textNode)
756             && (!isODFWhitespace(textNode.textContent) || isSignificantWhitespace(textNode, 0)));
757     }
758 
759     /**
760      * Remove any nodes that aren't fully contained within the supplied range. This function assumes
761      * the nodes appear in document order.
762      * @param {!Range} range
763      * @param {!Array.<!Node>} nodes
764      * @return {undefined}
765      */
766     function removePartiallyContainedNodes(range, nodes) {
767         while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[0]))) {
768             nodes.shift();
769         }
770         while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[nodes.length - 1]))) {
771             nodes.pop();
772         }
773     }
774 
775     /**
776      * Returns a array of text nodes considered to be part of the supplied range.
777      * This will exclude elements that are not part of the ODT main text body,
778      * as well as insignificant whitespace text nodes.
779      * @param {!Range} range    Range to search for nodes within
780      * @param {boolean} includePartial Include partially intersecting text nodes
781      *                                 in the result.
782      * @return {!Array.<!Node>}
783      */
784     function getTextNodes(range, includePartial) {
785         var textNodes;
786 
787         /**
788          * @param {!Node} node
789          * @return {number}
790          */
791         function nodeFilter(node) {
792             var result = NodeFilter.FILTER_REJECT;
793             if (node.nodeType === Node.TEXT_NODE) {
794                 if (isSignificantTextContent(/**@type{!Text}*/(node))) {
795                     result = NodeFilter.FILTER_ACCEPT;
796                 }
797             } else if (isTextContentContainingNode(node)) {
798                 result = NodeFilter.FILTER_SKIP;
799             }
800             return result;
801         }
802 
803         /*jslint bitwise:true*/
804         textNodes = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT|NodeFilter.SHOW_TEXT);
805         /*jslint bitwise:false*/
806         if (!includePartial) {
807             removePartiallyContainedNodes(range, textNodes);
808         }
809 
810         return textNodes;
811     }
812     this.getTextNodes = getTextNodes;
813 
814     /**
815      * Get all character elements and text nodes fully contained within the
816      * supplied range in document order.
817      *
818      * For example, given the following fragment, with the range starting at b,
819      * and ending at c:
820      *      <text:p>ab<text:s/>cd</text:p>
821      * this function would return the following array:
822      *      ["b", text:s, "c"]
823      * @param {!Range} range
824      * @param {!boolean} includePartial Include partially intersecting text &
825      *                         character nodes in the result.
826      * @param {!boolean} includeInsignificantWhitespace Include whitespace only
827      *                         nodes that are not considered significant text
828      *                         content. This includes whitespace only elements
829      *                         used in pretty-formatted xml as LibreOffice
830      *                         produces in flat ODT files.
831      * @return {!Array.<!Element|!Text>}
832      */
833     function getTextElements(range, includePartial, includeInsignificantWhitespace) {
834         var elements;
835 
836         /**
837          * @param {!Node} node
838          * @return {number}
839          */
840         function nodeFilter(node) {
841             var result = NodeFilter.FILTER_REJECT;
842             // do not return anything inside an character element or an inline root such as an annotation
843             if (isCharacterElement(node.parentNode) || isFieldElement(node.parentNode) || isInlineRoot(node)) {
844                 result = NodeFilter.FILTER_REJECT;
845             } else if (node.nodeType === Node.TEXT_NODE) {
846                 if (includeInsignificantWhitespace || isSignificantTextContent(/**@type{!Text}*/(node))) {
847                         // Text nodes should only be returned if they are
848                         // fully contained within the range.
849                     result = NodeFilter.FILTER_ACCEPT;
850                 }
851             } else if (isAnchoredAsCharacterElement(node)) {
852                 // Character elements should only be returned if they are
853                 // fully contained within the range.
854                 result =  NodeFilter.FILTER_ACCEPT;
855             } else if (isTextContentContainingNode(node) || isGroupingElement(node)) {
856                 result =  NodeFilter.FILTER_SKIP;
857             }
858             return result;
859         }
860 
861         /*jslint bitwise:true*/
862         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT);
863         /*jslint bitwise:false*/
864         if (!includePartial) {
865             removePartiallyContainedNodes(range, elements);
866         }
867 
868         return elements;
869     }
870     this.getTextElements = getTextElements;
871 
872     /**
873      * Crawl parent nodes starting at the startContainer until a matching node is found,
874      * or the first inline root is met. If a node is accepted by the supplied filter, and is
875      * not already the first element in the elements array, this node is prepended to the start
876      * of the elements array.
877      *
878      * @param {!Node} startContainer Container to start search from (inclusive)
879      * @param {!Array.<!Node>} elements Existing elements already discovered
880      * @param {!function(!Node):!boolean} filter
881      */
882     function prependParentContainers(startContainer, elements, filter) {
883         var container = startContainer;
884         while (container) {
885             if (filter(container)) {
886                 if (elements[0] !== container) {
887                     elements.unshift(container);
888                 }
889                 break;
890             }
891             if (isInlineRoot(container)) {
892                 break;
893             }
894             container = container.parentNode;
895         }
896     }
897 
898     /**
899      * Get all paragraph elements that intersect the supplied range in document
900      * order.
901      *
902      * For example, given the following fragment, with the range starting at b,
903      * and ending at c:
904      *      <text:p id="A">ab</text:p><text:p id="B"><text:s/>cd</text:p>
905      * this function would return the following array:
906      *      [text:p{id="A"}, text:p{id="B"}]
907      * @param {!Range} range
908      * @return {!Array.<!Element>}
909      */
910     this.getParagraphElements = function (range) {
911         var elements;
912         /**
913          * @param {!Node} node
914          * @return {number}
915          */
916         function nodeFilter(node) {
917             var result = NodeFilter.FILTER_REJECT;
918             if (isParagraph(node)) {
919                 result = NodeFilter.FILTER_ACCEPT;
920             } else if (isTextContentContainingNode(node) || isGroupingElement(node)) {
921                 result = NodeFilter.FILTER_SKIP;
922             }
923             return result;
924         }
925 
926         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT);
927         // getNodesInRange will only return nodes it enters during the iteration.
928         // However, we desire all paragraph nodes either contained OR containing this range,
929         // so we crawl the parentNodes of the start container until a root is found.
930 
931         // Note, this isn't necessary for the end container because iteration crosses the
932         // node boundary when entering towards the end container, meaning all paragraphs in
933         // the end container's parentNodes will be reported by getNodesInRange.
934         prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isParagraph);
935         return elements;
936     };
937 
938     /**
939      * Get all image elements that fully contained within the supplied range in
940      * document order.
941      * @param {!Range} range
942      * @return {!Array.<Node>}
943      */
944     this.getImageElements = function (range) {
945         var elements;
946 
947         /**
948          * @param {!Node} node
949          * @return {number}
950          */
951         function nodeFilter(node) {
952             var result = NodeFilter.FILTER_SKIP;
953             if (isImage(node)) {
954                 result = NodeFilter.FILTER_ACCEPT;
955             }
956             return result;
957         }
958 
959         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT);
960         // See description in getParagraphElements as to why this is necessary
961         // Short summary: want to include images that completely contain this range
962         prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isImage);
963         return elements;
964     };
965 
966     /**
967      * Returns the node right after the given point.
968      * @param {!Node} container
969      * @param {!number} offset
970      * @return {!Node}
971      */
972     function getRightNode(container, offset) {
973         var node = container;
974         if (offset < node.childNodes.length - 1) {
975             node = /** @type {!Node} */(node.childNodes[offset + 1]);
976         } else {
977             while (!node.nextSibling) {
978                 node = node.parentNode;
979             }
980             node = node.nextSibling;
981         }
982         while (node.firstChild) {
983             node = node.firstChild;
984         }
985         return node;
986     }
987 
988     /**
989      * Get all hyperlink elements that intersect the supplied range in document order
990      *
991      * For example, given the following fragment, with the range starting at b, and ending at c:
992      *      <text:a xlink:href="google">ab</text:a><text:a xlink:href="apple">cd</text:a>
993      * this function would return the following array:
994      *      [text:a{xlink:href="google"}, text:a{xlink:href="apple"}]
995      * @param {!Range} range
996      * @return {!Array.<Node>}
997      */
998     this.getHyperlinkElements = function (range) {
999         var links = [],
1000             newRange = /** @type {!Range}*/(range.cloneRange()),
1001             node,
1002             textNodes;
1003 
1004         if (range.collapsed && range.endContainer.nodeType === Node.ELEMENT_NODE) {
1005             node = getRightNode(range.endContainer, range.endOffset);
1006             if (node.nodeType === Node.TEXT_NODE) {
1007                 newRange.setEnd(node, 1);
1008             }
1009         }
1010 
1011         textNodes = getTextElements(newRange, true, false);
1012         textNodes.forEach(function (node) {
1013             var parent = node.parentNode;
1014             while (!isParagraph(parent)) {
1015                 if (isHyperlink(parent) && links.indexOf(parent) === -1) {
1016                     links.push(parent);
1017                     break;
1018                 }
1019                 parent = parent.parentNode;
1020             }
1021         });
1022         newRange.detach();
1023         return links;
1024     };
1025 
1026     /**
1027      * Normalize the font-family name as defined in
1028      * http://www.w3.org/TR/2008/REC-CSS2-20080411/fonts.html#propdef-font-family
1029      * (see there text behind: "There are two types of font family names: <family-name>")
1030      * @param {!string} fontFamilyName
1031      * @return {!string}
1032      */
1033     /*jslint regexp: true*/
1034     this.getNormalizedFontFamilyName = function(fontFamilyName) {
1035         // not quoted with either single- or double-quotes?
1036         // (\n & \r are syntactically okay as whitespaces, so need to be accepted as well)
1037         //     ^(["'])        -> match either " or ' at begin (and store match)
1038         //     (?:.|[\n\r])*? -> match non-greedy any number of any char or \r and \n
1039         //     \1$            -> match content of first match at end
1040         if (!(/^(["'])(?:.|[\n\r])*?\1$/).test(fontFamilyName)) {
1041             // remove any whitespaces at begin and end of full name (ignore internal yet)
1042             //     ^[ \t\r\n\f]*    -> match whitespace at begin
1043             //     ((?:.|[\n\r])*?) -> match non-greedy any number of any char or \r and \n (and store match)
1044             //     [ \t\r\n\f]*$    -> match whitespace at end
1045             fontFamilyName = fontFamilyName.replace(/^[ \t\r\n\f]*((?:.|[\n\r])*?)[ \t\r\n\f]*$/, "$1");
1046             // if there is any internal whitespace, reduce it to just one normal whitespace per group
1047             // and add quotes around the full name
1048             // (quotes should be only added if there is whitespace inside, as the passed fontFamilyName could
1049             // be a generic-family one, which must not be quoted)
1050             if ((/[ \t\r\n\f]/).test(fontFamilyName)) {
1051                 fontFamilyName = "'" + fontFamilyName.replace(/[ \t\r\n\f]+/g, " ") + "'";
1052             }
1053         }
1054         return fontFamilyName;
1055     };
1056     /*jslint regexp: false*/
1057 };
1058 
1059 /**
1060  * @type {!odf.OdfUtilsImpl}
1061  */
1062 odf.OdfUtils = new odf.OdfUtilsImpl();