1 /**
  2  * Copyright (C) 2012-2013 KO GmbH <copyright@kogmbh.com>
  3  *
  4  * @licstart
  5  * This file is part of WebODF.
  6  *
  7  * WebODF is free software: you can redistribute it and/or modify it
  8  * under the terms of the GNU Affero General Public License (GNU AGPL)
  9  * as published by the Free Software Foundation, either version 3 of
 10  * the License, or (at your option) any later version.
 11  *
 12  * WebODF is distributed in the hope that it will be useful, but
 13  * WITHOUT ANY WARRANTY; without even the implied warranty of
 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15  * GNU Affero General Public License for more details.
 16  *
 17  * You should have received a copy of the GNU Affero General Public License
 18  * along with WebODF.  If not, see <http://www.gnu.org/licenses/>.
 19  * @licend
 20  *
 21  * @source: http://www.webodf.org/
 22  * @source: https://github.com/kogmbh/WebODF/
 23  */
 24 
 25 /*global Node, runtime, odf, NodeFilter, core*/
 26 
 27 /**
 28  * @constructor
 29  */
 30 odf.OdfUtils = function OdfUtils() {
 31     "use strict";
 32 
 33     var /**@const
 34            @type{!string}*/
 35         textns = odf.Namespaces.textns,
 36         /**@const
 37            @type{!string}*/
 38         drawns = odf.Namespaces.drawns,
 39         /**@const
 40            @type{!string}*/
 41         xlinkns = odf.Namespaces.xlinkns,
 42         domUtils = new core.DomUtils(),
 43         // only add odf element namespaces here.
 44         // Namespaces solely used for attributes are excluded. eg. fo, xlink & xml
 45         odfNodeNamespaceMap = [
 46             odf.Namespaces.dbns,
 47             odf.Namespaces.dcns,
 48             odf.Namespaces.dr3dns,
 49             odf.Namespaces.drawns,
 50             odf.Namespaces.chartns,
 51             odf.Namespaces.formns,
 52             odf.Namespaces.numberns,
 53             odf.Namespaces.officens,
 54             odf.Namespaces.presentationns,
 55             odf.Namespaces.stylens,
 56             odf.Namespaces.svgns,
 57             odf.Namespaces.tablens,
 58             odf.Namespaces.textns
 59         ];
 60 
 61     /**
 62      * Determine if the node is a draw:image element.
 63      * @param {?Node} e
 64      * @return {!boolean}
 65      */
 66     function isImage(e) {
 67         var name = e && e.localName;
 68         return name === "image" && e.namespaceURI === drawns;
 69     }
 70     this.isImage = isImage;
 71 
 72     /**
 73      * Determine if the node is a draw:frame element and has its text:anchor-type attribute set to 'as-char'.
 74      * @param {?Node} e
 75      * @return {!boolean}
 76      */
 77     function isCharacterFrame(e) {
 78         // TODO the anchor-type can be defined on any style associated with the frame
 79         return e !== null && e.nodeType === Node.ELEMENT_NODE
 80             && e.localName === "frame" && e.namespaceURI === drawns
 81             && /**@type{!Element}*/(e).getAttributeNS(textns, "anchor-type")
 82                 === "as-char";
 83     }
 84     this.isCharacterFrame = isCharacterFrame;
 85 
 86     /**
 87      * Determine if the node is an office:annotation element.
 88      * @param {?Node} e
 89      * @return {!boolean}
 90      */
 91     function isAnnotation(e) {
 92         var name = e && e.localName;
 93         return name === "annotation" && e.namespaceURI === odf.Namespaces.officens;
 94     }
 95 
 96     /**
 97      * Determine if the node is an annotation wrapper element.
 98      * @param {?Node} e
 99      * @return {!boolean}
100      */
101     function isAnnotationWrapper(e) {
102         var name = e && e.localName;
103         return name === "div" && /**@type{!HTMLDivElement}*/(e).className === "annotationWrapper";
104     }
105 
106     /**
107      * Determine if the node is an inline 'root' type,
108      * i.e. an office:annotation or a wrapper for an annotaiton.
109      * @param {?Node} e
110      * @return {!boolean}
111      */
112     function isInlineRoot(e) {
113         return isAnnotation(e)
114             || isAnnotationWrapper(e);
115     }
116     this.isInlineRoot = isInlineRoot;
117 
118     /**
119      * Determine if the node is a text:span element.
120      * @param {?Node} e
121      * @return {!boolean}
122      */
123     this.isTextSpan = function (e) {
124         var name = e && e.localName;
125         return name === "span" && e.namespaceURI === textns;
126     };
127 
128     /**
129      * Determine if the node is a text:a element.
130      * @param {?Node} node
131      * @return {!boolean}
132      */
133     function isHyperlink(node) {
134         var name = node && node.localName;
135         return name === "a" && node.namespaceURI === textns;
136     }
137     this.isHyperlink = isHyperlink;
138 
139     /**
140      * Gets the href attribute of text:a element
141      * @param {!Element} element
142      * @return {!string}
143      */
144     this.getHyperlinkTarget = function (element) {
145         return element.getAttributeNS(xlinkns, 'href') || "";
146     };
147 
148     /**
149      * Determine if the node is a text:p or a text:h element.
150      * @param {?Node} e
151      * @return {!boolean}
152      */
153     function isParagraph(e) {
154         var name = e && e.localName;
155         return (name === "p" || name === "h") && e.namespaceURI === textns;
156     }
157     this.isParagraph = isParagraph;
158 
159     /**
160      * Find the paragraph containing the specified node. If an offset is provided and
161      * the node has a child at the specified offset, this will be included in the search
162      * as well if the supplied node is not a paragraph itself.
163      * @param {?Node} node
164      * @param {!number=} offset
165      * @return {?Element}
166      */
167     function getParagraphElement(node, offset) {
168         if (node && offset !== undefined && !isParagraph(node) && node.childNodes.item(offset)) {
169             node = node.childNodes.item(offset);
170         }
171         while (node && !isParagraph(node)) {
172             node = node.parentNode;
173         }
174         return /**@type{?Element}*/(node);
175     }
176     this.getParagraphElement = getParagraphElement;
177 
178     /**
179      * Returns true if the specified node is contained within a text:tracked-changes parent
180      * @param {Node} node Node to start searching from
181      * @param {!Node} container Root container to stop searching at. This helps set the boundary of the current
182      *  search and will usually be the root level element node (e.g., office:text, office:presentation, etc.)
183      * @return {!boolean}
184      */
185     this.isWithinTrackedChanges = function (node, container) {
186         while (node && node !== container) {
187             if (node.namespaceURI === textns && node.localName === 'tracked-changes') {
188                 return true;
189             }
190             node = node.parentNode;
191         }
192         return false;
193     };
194 
195     /**
196      * @param {?Node} node  Node to start searching with
197      * @param {!Element} container  Root container to stop searching at.
198      * @return {?Element}
199      */
200     function getParentAnnotation(node, container) {
201          while (node && node !== container) {
202             if (node.namespaceURI === odf.Namespaces.officens && node.localName === 'annotation') {
203                 return /**@type{!Element}*/(node);
204             }
205             node = node.parentNode;
206         }
207         return null;
208     }
209     this.getParentAnnotation = getParentAnnotation;
210 
211     /**
212      * @param {?Node} node  Node to start searching with
213      * @param {!Element} container  Root container to stop searching at.
214      * @return {!boolean}
215      */
216     this.isWithinAnnotation = function (node, container) {
217         return Boolean(getParentAnnotation(node, container));
218     };
219 
220     /**
221      * Gets the creator of an annotation.
222      * @param {!Element} annotationElement
223      * @return {!string}
224      */
225     this.getAnnotationCreator = function (annotationElement) {
226         var creatorElement = /**@type{!Element}*/(annotationElement.getElementsByTagNameNS(odf.Namespaces.dcns, "creator")[0]);
227         return creatorElement.textContent;
228     };
229 
230     /**
231      * Determine if the node is a text:list-item element.
232      * @param {?Node} e
233      * @return {!boolean}
234      */
235     this.isListItem = function (e) {
236         var name = e && e.localName;
237         return name === "list-item" && e.namespaceURI === textns;
238     };
239 
240     /**
241      * Determine if the node is a text:line-break element.
242      * @param {?Node} e
243      * @return {!boolean}
244      */
245     this.isLineBreak = function (e) {
246         var name = e && e.localName;
247         return name === "line-break" && e.namespaceURI === textns;
248     };
249 
250     /**
251      * Determine if the text consists entirely of whitespace characters.
252      * At least one whitespace is required.
253      * @param {!string} text
254      * @return {!boolean}
255      */
256     function isODFWhitespace(text) {
257         return (/^[ \t\r\n]+$/).test(text);
258     }
259     this.isODFWhitespace = isODFWhitespace;
260 
261     /**
262      * Determine if the node is a grouping element.
263      * @param {?Node} n
264      * @return {!boolean}
265      */
266     function isGroupingElement(n) {
267         if (n === null || n.nodeType !== Node.ELEMENT_NODE) {
268             return false;
269         }
270         var e = /**@type{!Element}*/(n),
271             localName = e.localName;
272         return (/^(span|p|h|a|meta)$/.test(localName)
273                 && e.namespaceURI === textns)
274                || (localName === "span"
275                    && e.className === "webodf-annotationHighlight");
276     }
277     this.isGroupingElement = isGroupingElement;
278     /**
279      * Determine if the node is a character element,
280      * namely "s", "tab", or "line-break".
281      * @param {?Node} e
282      * @return {!boolean}
283      */
284     function isCharacterElement(e) {
285         var n = e && e.localName,
286             ns,
287             r = false;
288         if (n) {
289             ns = e.namespaceURI;
290             if (ns === textns) {
291                 r = n === "s" || n === "tab" || n === "line-break";
292             }
293         }
294         return r;
295     }
296     this.isCharacterElement = isCharacterElement;
297     /**
298      * Determine if the node is an 'as char' type of element,
299      * i.e. any element which behaves like a character with
300      * respect to it's surrounding positions, such as the
301      * space/tab/line-break elements, draw:frames with
302      * anchor type being 'as-char', or inline root elements
303      * such as annotations.
304      * @param {?Node} e
305      * @return {!boolean}
306      */
307     function isAnchoredAsCharacterElement(e) {
308         return isCharacterElement(e) || isCharacterFrame(e) || isInlineRoot(e);
309     }
310     this.isAnchoredAsCharacterElement = isAnchoredAsCharacterElement;
311     /**
312      * Determine if the node is a <text:s/> character element.
313      * @param {?Node} e
314      * @return {!boolean}
315      */
316     function isSpaceElement(e) {
317         var n = e && e.localName,
318             ns,
319             r = false;
320         if (n) {
321             ns = e.namespaceURI;
322             if (ns === textns) {
323                 r = n === "s";
324             }
325         }
326         return r;
327     }
328     this.isSpaceElement = isSpaceElement;
329 
330     /**
331      * Returns true if the given node is an odf node
332      * @param {!Node} node
333      * @return {!boolean}
334      */
335     function isODFNode(node) {
336         return odfNodeNamespaceMap.indexOf(node.namespaceURI) !== -1;
337     }
338     this.isODFNode = isODFNode;
339 
340     /**
341      * Returns true if the supplied node contains no text-in-ODF, or ODF elements
342      * @param {!Node} node
343      * @return {!boolean}
344      */
345     function hasNoODFContent(node) {
346         var childNode;
347         if (isCharacterElement(node)) {
348             return false;
349         }
350         if (isGroupingElement(/**@type{!Node}*/(node.parentNode)) && node.nodeType === Node.TEXT_NODE) {
351             return node.textContent.length === 0;
352         }
353         childNode = node.firstChild;
354         while (childNode) {
355             if (isODFNode(childNode) || !hasNoODFContent(childNode)) {
356                 return false;
357             }
358             childNode = childNode.nextSibling;
359         }
360         return true;
361     }
362     this.hasNoODFContent= hasNoODFContent;
363 
364     /**
365      * @param {!Node} node
366      * @return {!Node}
367      */
368     function firstChild(node) {
369         while (node.firstChild !== null && isGroupingElement(node)) {
370             node = node.firstChild;
371         }
372         return node;
373     }
374     this.firstChild = firstChild;
375     /**
376      * @param {!Node} node
377      * @return {!Node}
378      */
379     function lastChild(node) {
380         while (node.lastChild !== null && isGroupingElement(node)) {
381             node = node.lastChild;
382         }
383         return node;
384     }
385     this.lastChild = lastChild;
386     /**
387      * @param {!Node} node
388      * @return {?Node}
389      */
390     function previousNode(node) {
391         while (!isParagraph(node) && node.previousSibling === null) {
392             node = /**@type{!Node}*/(node.parentNode);
393         }
394         return isParagraph(node) ? null : lastChild(/**@type{!Node}*/(node.previousSibling));
395     }
396     this.previousNode = previousNode;
397     /**
398      * @param {!Node} node
399      * @return {?Node}
400      */
401     function nextNode(node) {
402         while (!isParagraph(node) && node.nextSibling === null) {
403             node = /**@type{!Node}*/(node.parentNode);
404         }
405         return isParagraph(node) ? null : firstChild(/**@type{!Node}*/(node.nextSibling));
406     }
407     this.nextNode = nextNode;
408 
409     /**
410      * Walk to the left along the DOM and return true if the first thing
411      * encountered is either a non-whitespace text character or a non-space
412      * character element (i.e., any character element other than <text:s/>).
413      * Walking goes through grouping elements.
414      * @param {?Node} node the first node to scan
415      * @return {!boolean}
416      */
417     function scanLeftForNonSpace(node) {
418         var r = false,
419             text;
420         while (node) {
421             if (node.nodeType === Node.TEXT_NODE) {
422                 text = /**@type{!Text}*/(node);
423                 if (text.length === 0) {
424                     node = previousNode(text);
425                 } else {
426                     return !isODFWhitespace(
427                         text.data.substr(text.length - 1, 1)
428                     );
429                 }
430             } else if (isAnchoredAsCharacterElement(node)) {
431                 r = isSpaceElement(node) === false;
432                 node = null;
433             } else {
434                 node = previousNode(node);
435             }
436         }
437         return r;
438     }
439     this.scanLeftForNonSpace = scanLeftForNonSpace;
440     /**
441      * Walk to the left along the DOM and return the type of the first
442      * thing encountered.
443      * 0 none of the below
444      * 1 non-whitespace character or a character element
445      * 2 whitespace character that is preceded by a non-whitespace character
446      *   or a character element
447      *
448      * @param {!Node} node the first node to scan
449      * @return {!number}
450      */
451     function lookLeftForCharacter(node) {
452         var text, r = 0, tl = 0;
453         if (node.nodeType === Node.TEXT_NODE) {
454             tl = /**@type{!Text}*/(node).length;
455         }
456         if (tl > 0) {
457             text = /**@type{!Text}*/(node).data;
458             if (!isODFWhitespace(text.substr(tl - 1, 1))) {
459                 r = 1; // character found
460             } else if (tl === 1) {
461                 r = scanLeftForNonSpace(previousNode(node)) ? 2 : 0;
462             } else {
463                 r = isODFWhitespace(text.substr(tl - 2, 1)) ? 0 : 2;
464             }
465         } else if (isAnchoredAsCharacterElement(node)) {
466             r = 1;
467         }
468         return r;
469     }
470     this.lookLeftForCharacter = lookLeftForCharacter;
471     /**
472      * Look to the right along the DOM and return true if the first thing
473      * encountered is either a non-whitespace character or a character
474      * element.
475      *
476      * @param {?Node} node the first node to scan
477      * @return {!boolean}
478      */
479     function lookRightForCharacter(node) {
480         var r = false,
481             l = 0;
482         if (node && node.nodeType === Node.TEXT_NODE) {
483             l = /**@type{!Text}*/(node).length;
484         }
485         if (l > 0) {
486             r = !isODFWhitespace(/**@type{!Text}*/(node).data.substr(0, 1));
487         } else if (isAnchoredAsCharacterElement(node)) {
488             r = true;
489         }
490         return r;
491     }
492     this.lookRightForCharacter = lookRightForCharacter;
493     /**
494      * Walk to the left along the DOM and return true if either a
495      * non-whitespace character or a character element is encountered.
496      *
497      * @param {?Node} node the first node to scan
498      * @return {!boolean}
499      */
500     function scanLeftForAnyCharacter(node) {
501         var r = false, l;
502         node = node && lastChild(node);
503         while (node) {
504             if (node.nodeType === Node.TEXT_NODE) {
505                 l = /**@type{!Text}*/(node).length;
506             } else {
507                 l = 0;
508             }
509             if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) {
510                 r = true;
511                 break;
512             }
513             if (isAnchoredAsCharacterElement(node)) {
514                 r = true;
515                 break;
516             }
517             node = previousNode(node);
518         }
519         return r;
520     }
521     this.scanLeftForAnyCharacter = scanLeftForAnyCharacter;
522     /**
523      * Walk to the right along the DOM and return true if either a
524      * non-whitespace character or a character element is encountered.
525      *
526      * @param {?Node} node the first node to scan
527      * @return {!boolean}
528      */
529     function scanRightForAnyCharacter(node) {
530         var r = false, l;
531         node = node && firstChild(node);
532         while (node) {
533             if (node.nodeType === Node.TEXT_NODE) {
534                 l = /**@type{!Text}*/(node).length;
535             } else {
536                 l = 0;
537             }
538             if (l > 0 && !isODFWhitespace(/**@type{!Text}*/(node).data)) {
539                 r = true;
540                 break;
541             }
542             if (isAnchoredAsCharacterElement(node)) {
543                 r = true;
544                 break;
545             }
546             node = nextNode(node);
547         }
548         return r;
549     }
550     this.scanRightForAnyCharacter = scanRightForAnyCharacter;
551 
552     /**
553      * check if the node is part of the trailing whitespace
554      * @param {!Text} textnode
555      * @param {!number} offset
556      * @return {!boolean}
557      */
558     function isTrailingWhitespace(textnode, offset) {
559         if (!isODFWhitespace(textnode.data.substr(offset))) {
560             return false;
561         }
562         return !scanRightForAnyCharacter(nextNode(textnode));
563     }
564     this.isTrailingWhitespace = isTrailingWhitespace;
565 
566     /**
567      * Takes a textNode and an offset, and returns true if the character
568      * at that offset is a significant whitespace.
569      *
570      * Significant whitespace is defined as:
571      * - Not part of the leading whitespace block in a paragraph
572      * - Not part of the trailing whitespace block in a paragraph
573      * - The first whitespace character after a text node or character
574      *
575      * All other whitespace elements are considered insignificant
576      * @param {!Text} textNode
577      * @param {!number} offset
578      * @return {!boolean}
579      */
580     function isSignificantWhitespace(textNode, offset) {
581         var text = textNode.data,
582             result;
583 
584         if (!isODFWhitespace(text[offset])) {
585             // Character is not whitespace
586             return false;
587         }
588 
589         if (isAnchoredAsCharacterElement(textNode.parentNode)) {
590             // Parent is a character element, and therefore does not actually contain text
591             // This prevents a space element from being upgraded again
592             return false;
593         }
594 
595         if (offset > 0) {
596             if (!isODFWhitespace(text[offset - 1])) {
597                 // First whitespace after a character is significant
598                 result = true;
599             }
600         } else if (scanLeftForNonSpace(previousNode(textNode))) {
601             // If the first character found scanning to the left is non-whitespace, this might still be significant
602             result = true;
603         }
604 
605         if (result === true) {
606             return isTrailingWhitespace(textNode, offset)
607                 ? false : true;
608         }
609         return false;
610     }
611     this.isSignificantWhitespace = isSignificantWhitespace;
612 
613     /**
614      * Returns true if the supplied node is a downgradeable space element.
615      * As per http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#element-text_s
616      * a downgradeable whitespace element is a space element that is immediately preceded by something other than a space
617      * and has at least one non-space character after it
618      * @param {!Node} node
619      * @return {!boolean}
620      */
621     this.isDowngradableSpaceElement = function(node) {
622         if (isSpaceElement(node)) {
623             return scanLeftForNonSpace(previousNode(node)) && scanRightForAnyCharacter(nextNode(node));
624         }
625         return false;
626     };
627 
628     /**
629      * Returns the length split as value and unit, from an ODF attribute
630      * @param {?string|undefined} length
631      * @return {?{value:!number,unit:!string}}
632      */
633     function parseLength(length) {
634         var re = /(-?[0-9]*[0-9][0-9]*(\.[0-9]*)?|0+\.[0-9]*[1-9][0-9]*|\.[0-9]*[1-9][0-9]*)((cm)|(mm)|(in)|(pt)|(pc)|(px)|(%))/,
635             m = re.exec(length);
636         if (!m) {
637             return null;
638         }
639         return {value: parseFloat(m[1]), unit: m[3]};
640     }
641     this.parseLength = parseLength;
642 
643     /**
644      * Returns the value and unit of the length, if it is positive ( > 0)
645      * @param {?string|undefined} length
646      * @return {?{value:!number,unit:!string}}
647      */
648     function parsePositiveLength(length) {
649         var result = parseLength(length);
650         if (result && (result.value <= 0 || result.unit === '%')) {
651             return null;
652         }
653         return result;
654     }
655 
656     /**
657      * Returns the value and unit of the length, if it is non-negative ( >= 0)
658      * @param {?string|undefined} length
659      * @return {?{value:!number,unit:!string}}
660      */
661     function parseNonNegativeLength(length) {
662         var result = parseLength(length);
663         if (result && (result.value < 0 || result.unit === '%')) {
664             return null;
665         }
666         return result;
667     }
668     this.parseNonNegativeLength = parseNonNegativeLength;
669 
670     /**
671      * Returns the value and unit(%) of the length, if it is specified in %age
672      * @param {?string|undefined} length
673      * @return {?{value:!number,unit:!string}}
674      */
675     function parsePercentage(length) {
676         var result = parseLength(length);
677         if (result && (result.unit !== '%')) {
678             return null;
679         }
680         return result;
681     }
682 
683     /**
684      * Returns the value and unit of the font size, in conformance with fo:font-size
685      * constraints
686      * @param {?string|undefined} fontSize
687      * @return {?{value:!number,unit:!string}}
688      */
689     function parseFoFontSize(fontSize) {
690         return parsePositiveLength(fontSize) || parsePercentage(fontSize);
691     }
692     this.parseFoFontSize = parseFoFontSize;
693 
694     /**
695      * Returns the value and unit of the line height, in conformance with fo:line-height
696      * constraints
697      * @param {?string|undefined} lineHeight
698      * @return {?{value:!number,unit:!string}}
699      */
700     function parseFoLineHeight(lineHeight) {
701         return parseNonNegativeLength(lineHeight) || parsePercentage(lineHeight);
702     }
703     this.parseFoLineHeight = parseFoLineHeight;
704 
705     /**
706      * Adapted from instructions on how to generate plain text from an ODT document.
707      * See algorithm at http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415196_253892949
708      * @param {!Node} node
709      * @return {boolean}
710      */
711     function isTextContentContainingNode(node) {
712         switch (node.namespaceURI) {
713             // Namespace skips
714         case odf.Namespaces.drawns:
715         case odf.Namespaces.svgns:
716         case odf.Namespaces.dr3dns:
717             return false;
718         case odf.Namespaces.textns:
719             // Specific node type skips
720             //noinspection FallthroughInSwitchStatementJS
721             switch (node.localName) {
722             case 'note-body':
723             case 'ruby-text':
724                 return false;
725             }
726             break;
727         case odf.Namespaces.officens:
728             // Specific node type skips
729             //noinspection FallthroughInSwitchStatementJS
730             switch (node.localName) {
731             case 'annotation':
732             case 'binary-data':
733             case 'event-listeners':
734                 return false;
735             }
736             break;
737         default:
738             // Skip webodf edit markers or cursor information
739             switch (node.localName) {
740             case 'cursor':
741             case 'editinfo':
742                 return false;
743             }
744             break;
745         }
746         return true;
747     }
748     this.isTextContentContainingNode = isTextContentContainingNode;
749 
750     /**
751      * Returns true if the text node is within a paragraph and contains either non-whitespace characters, or
752      * significant whitespace characters (as defined by the ODF standards).
753      * WARNING this method is likely to be quite slow, so should be used as little as possible
754      * @param {!Text} textNode
755      * @return {!boolean}
756      */
757     function isSignificantTextContent(textNode) {
758         return Boolean(getParagraphElement(textNode)
759             && (!isODFWhitespace(textNode.textContent) || isSignificantWhitespace(textNode, 0)));
760     }
761 
762     /**
763      * Remove any nodes that aren't fully contained within the supplied range. This function assumes
764      * the nodes appear in document order.
765      * @param {!Range} range
766      * @param {!Array.<!Node>} nodes
767      * @return {undefined}
768      */
769     function removePartiallyContainedNodes(range, nodes) {
770         while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[0]))) {
771             nodes.shift();
772         }
773         while (nodes.length > 0 && !domUtils.rangeContainsNode(range, /**@type{!Node}*/(nodes[nodes.length - 1]))) {
774             nodes.pop();
775         }
776     }
777 
778     /**
779      * Returns a array of text nodes considered to be part of the supplied range.
780      * This will exclude elements that are not part of the ODT main text body,
781      * as well as insignificant whitespace text nodes.
782      * @param {!Range} range    Range to search for nodes within
783      * @param {boolean} includePartial Include partially intersecting text nodes
784      *                                 in the result.
785      * @return {!Array.<!Node>}
786      */
787     function getTextNodes(range, includePartial) {
788         var textNodes;
789 
790         /**
791          * @param {!Node} node
792          * @return {number}
793          */
794         function nodeFilter(node) {
795             var result = NodeFilter.FILTER_REJECT;
796             if (node.nodeType === Node.TEXT_NODE) {
797                 if (isSignificantTextContent(/**@type{!Text}*/(node))) {
798                     result = NodeFilter.FILTER_ACCEPT;
799                 }
800             } else if (isTextContentContainingNode(node)) {
801                 result = NodeFilter.FILTER_SKIP;
802             }
803             return result;
804         }
805 
806         /*jslint bitwise:true*/
807         textNodes = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT|NodeFilter.SHOW_TEXT);
808         /*jslint bitwise:false*/
809         if (!includePartial) {
810             removePartiallyContainedNodes(range, textNodes);
811         }
812 
813         return textNodes;
814     }
815     this.getTextNodes = getTextNodes;
816 
817     /**
818      * Get all character elements and text nodes fully contained within the
819      * supplied range in document order.
820      *
821      * For example, given the following fragment, with the range starting at b,
822      * and ending at c:
823      *      <text:p>ab<text:s/>cd</text:p>
824      * this function would return the following array:
825      *      ["b", text:s, "c"]
826      * @param {!Range} range
827      * @param {!boolean} includePartial Include partially intersecting text &
828      *                         character nodes in the result.
829      * @param {!boolean} includeInsignificantWhitespace Include whitespace only
830      *                         nodes that are not considered significant text
831      *                         content. This includes whitespace only elements
832      *                         used in pretty-formatted xml as LibreOffice
833      *                         produces in flat ODT files.
834      * @return {!Array.<!Element|!Text>}
835      */
836     function getTextElements(range, includePartial, includeInsignificantWhitespace) {
837         var elements;
838 
839         /**
840          * @param {!Node} node
841          * @return {number}
842          */
843         function nodeFilter(node) {
844             var result = NodeFilter.FILTER_REJECT;
845             // do not return anything inside an character element or an inline root such as an annotation
846             if (isCharacterElement(node.parentNode) || isInlineRoot(node)) {
847                 result = NodeFilter.FILTER_REJECT;
848             } else if (node.nodeType === Node.TEXT_NODE) {
849                 if (includeInsignificantWhitespace || isSignificantTextContent(/**@type{!Text}*/(node))) {
850                         // Text nodes should only be returned if they are
851                         // fully contained within the range.
852                     result = NodeFilter.FILTER_ACCEPT;
853                 }
854             } else if (isAnchoredAsCharacterElement(node)) {
855                 // Character elements should only be returned if they are
856                 // fully contained within the range.
857                 result =  NodeFilter.FILTER_ACCEPT;
858             } else if (isTextContentContainingNode(node) || isGroupingElement(node)) {
859                 result =  NodeFilter.FILTER_SKIP;
860             }
861             return result;
862         }
863 
864         /*jslint bitwise:true*/
865         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT);
866         /*jslint bitwise:false*/
867         if (!includePartial) {
868             removePartiallyContainedNodes(range, elements);
869         }
870 
871         return elements;
872     }
873     this.getTextElements = getTextElements;
874 
875     /**
876      * Crawl parent nodes starting at the startContainer until a matching node is found,
877      * or the first inline root is met. If a node is accepted by the supplied filter, and is
878      * not already the first element in the elements array, this node is prepended to the start
879      * of the elements array.
880      *
881      * @param {!Node} startContainer Container to start search from (inclusive)
882      * @param {!Array.<!Node>} elements Existing elements already discovered
883      * @param {!function(!Node):!boolean} filter
884      */
885     function prependParentContainers(startContainer, elements, filter) {
886         var container = startContainer;
887         while (container) {
888             if (filter(container)) {
889                 if (elements[0] !== container) {
890                     elements.unshift(container);
891                 }
892                 break;
893             }
894             if (isInlineRoot(container)) {
895                 break;
896             }
897             container = container.parentNode;
898         }
899     }
900 
901     /**
902      * Get all paragraph elements that intersect the supplied range in document
903      * order.
904      *
905      * For example, given the following fragment, with the range starting at b,
906      * and ending at c:
907      *      <text:p id="A">ab</text:p><text:p id="B"><text:s/>cd</text:p>
908      * this function would return the following array:
909      *      [text:p{id="A"}, text:p{id="B"}]
910      * @param {!Range} range
911      * @return {!Array.<!Element>}
912      */
913     this.getParagraphElements = function (range) {
914         var elements;
915         /**
916          * @param {!Node} node
917          * @return {number}
918          */
919         function nodeFilter(node) {
920             var result = NodeFilter.FILTER_REJECT;
921             if (isParagraph(node)) {
922                 result = NodeFilter.FILTER_ACCEPT;
923             } else if (isTextContentContainingNode(node) || isGroupingElement(node)) {
924                 result = NodeFilter.FILTER_SKIP;
925             }
926             return result;
927         }
928 
929         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT);
930         // getNodesInRange will only return nodes it enters during the iteration.
931         // However, we desire all paragraph nodes either contained OR containing this range,
932         // so we crawl the parentNodes of the start container until a root is found.
933 
934         // Note, this isn't necessary for the end container because iteration crosses the
935         // node boundary when entering towards the end container, meaning all paragraphs in
936         // the end container's parentNodes will be reported by getNodesInRange.
937         prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isParagraph);
938         return elements;
939     };
940 
941     /**
942      * Get all image elements that fully contained within the supplied range in
943      * document order.
944      * @param {!Range} range
945      * @return {!Array.<Node>}
946      */
947     this.getImageElements = function (range) {
948         var elements;
949 
950         /**
951          * @param {!Node} node
952          * @return {number}
953          */
954         function nodeFilter(node) {
955             var result = NodeFilter.FILTER_SKIP;
956             if (isImage(node)) {
957                 result = NodeFilter.FILTER_ACCEPT;
958             }
959             return result;
960         }
961 
962         elements = domUtils.getNodesInRange(range, nodeFilter, NodeFilter.SHOW_ELEMENT);
963         // See description in getParagraphElements as to why this is necessary
964         // Short summary: want to include images that completely contain this range
965         prependParentContainers(/**@type{!Node}*/(range.startContainer), elements, isImage);
966         return elements;
967     };
968 
969     /**
970      * Returns the node right after the given point.
971      * @param {!Node} container
972      * @param {!number} offset
973      * @return {!Node}
974      */
975     function getRightNode(container, offset) {
976         var node = container;
977         if (offset < node.childNodes.length - 1) {
978             node = /** @type {!Node} */(node.childNodes[offset + 1]);
979         } else {
980             while (!node.nextSibling) {
981                 node = node.parentNode;
982             }
983             node = node.nextSibling;
984         }
985         while (node.firstChild) {
986             node = node.firstChild;
987         }
988         return node;
989     }
990 
991     /**
992      * Get all hyperlink elements that intersect the supplied range in document order
993      *
994      * For example, given the following fragment, with the range starting at b, and ending at c:
995      *      <text:a xlink:href="google">ab</text:a><text:a xlink:href="apple">cd</text:a>
996      * this function would return the following array:
997      *      [text:a{xlink:href="google"}, text:a{xlink:href="apple"}]
998      * @param {!Range} range
999      * @return {!Array.<Node>}
1000      */
1001     this.getHyperlinkElements = function (range) {
1002         var links = [],
1003             newRange = /** @type {!Range}*/(range.cloneRange()),
1004             node,
1005             textNodes;
1006 
1007         if (range.collapsed && range.endContainer.nodeType === Node.ELEMENT_NODE) {
1008             node = getRightNode(range.endContainer, range.endOffset);
1009             if (node.nodeType === Node.TEXT_NODE) {
1010                 newRange.setEnd(node, 1);
1011             }
1012         }
1013 
1014         textNodes = getTextElements(newRange, true, false);
1015         textNodes.forEach(function (node) {
1016             var parent = node.parentNode;
1017             while (!isParagraph(parent)) {
1018                 if (isHyperlink(parent) && links.indexOf(parent) === -1) {
1019                     links.push(parent);
1020                     break;
1021                 }
1022                 parent = parent.parentNode;
1023             }
1024         });
1025         newRange.detach();
1026         return links;
1027     };
1028 
1029     /**
1030      * Normalize the font-family name as defined in
1031      * http://www.w3.org/TR/2008/REC-CSS2-20080411/fonts.html#propdef-font-family
1032      * (see there text behind: "There are two types of font family names: <family-name>")
1033      * @param {!string} fontFamilyName
1034      * @return {!string}
1035      */
1036     /*jslint regexp: true*/
1037     this.getNormalizedFontFamilyName = function(fontFamilyName) {
1038         // not quoted with either single- or double-quotes?
1039         // (\n & \r are syntactically okay as whitespaces, so need to be accepted as well)
1040         //     ^(["'])        -> match either " or ' at begin (and store match)
1041         //     (?:.|[\n\r])*? -> match non-greedy any number of any char or \r and \n
1042         //     \1$            -> match content of first match at end
1043         if (!(/^(["'])(?:.|[\n\r])*?\1$/).test(fontFamilyName)) {
1044             // remove any whitespaces at begin and end of full name (ignore internal yet)
1045             //     ^[ \t\r\n\f]*    -> match whitespace at begin
1046             //     ((?:.|[\n\r])*?) -> match non-greedy any number of any char or \r and \n (and store match)
1047             //     [ \t\r\n\f]*$    -> match whitespace at end
1048             fontFamilyName = fontFamilyName.replace(/^[ \t\r\n\f]*((?:.|[\n\r])*?)[ \t\r\n\f]*$/, "$1");
1049             // if there is any internal whitespace, reduce it to just one normal whitespace per group
1050             // and add quotes around the full name
1051             // (quotes should be only added if there is whitespace inside, as the passed fontFamilyName could
1052             // be a generic-family one, which must not be quoted)
1053             if ((/[ \t\r\n\f]/).test(fontFamilyName)) {
1054                 fontFamilyName = "'" + fontFamilyName.replace(/[ \t\r\n\f]+/g, " ") + "'";
1055             }
1056         }
1057         return fontFamilyName;
1058     };
1059     /*jslint regexp: false*/
1060 };
1061