InsertDocument optimization

Oct 7, 2014 at 12:35 PM
Edited Oct 7, 2014 at 12:38 PM
Hi!

I'm using this lib to merge quite many (300+) short documents (1-3 pages) into one.
Linearly doing InsertDocument takes forever to complete, so I parallelized the process (merging every even and odd document).
Still it was extremely slow (around 15 minutes in my test case).
I profiled the process and found out that merge_numbering method was the offender.
Downloaded source and started optimizing (mainly removing some nested loops) and now it takes about 15 seconds.
I don't know the process for contributing to CodePlex or DocX so I'll just paste the code here for someone else to review / add if desirable (no attribution needed):
        private void merge_numbering(PackagePart remote_pp, PackagePart local_pp, XDocument remote_mainDoc, DocX remote)
        {
            XName nameNum = XName.Get("num", DocX.w.NamespaceName);
            XName nameNumId = XName.Get("numId", DocX.w.NamespaceName);
            XName nameAbstractNumId = XName.Get("abstractNumId", DocX.w.NamespaceName);
            XName nameAbstractNum = XName.Get("abstractNum", DocX.w.NamespaceName);
            XName nameVal = XName.Get("val", DocX.w.NamespaceName);
            // Add each remote numbering to this document.
            IEnumerable<XElement> remote_abstractNums = remote.numbering.Root.Elements(nameAbstractNum);
            int guidd = 0;
            foreach (var an in remote_abstractNums)
            {
                XAttribute a = an.Attribute(nameAbstractNumId);
                if (a != null)
                {
                    int i;
                    if (int.TryParse(a.Value, out i))
                    {
                        if (i > guidd)
                            guidd = i;
                    }
                }
            }
            guidd++;

            IEnumerable<XElement> remote_nums = remote.numbering.Root.Elements(nameNum);
            int guidd2 = 0;
            foreach (var an in remote_nums)
            {
                XAttribute a = an.Attribute(nameNumId);
                if (a != null)
                {
                    int i;
                    if (int.TryParse(a.Value, out i))
                    {
                        if (i > guidd2)
                            guidd2 = i;
                    }
                }
            }
            guidd2++;

            var numIds = remote_mainDoc.Descendants(nameNumId);
            var numIdValues = new Dictionary<String, List<XAttribute>>();
            foreach (var numId in numIds)
            {
                XAttribute attr = numId.Attribute(nameVal);
                if (attr == null)
                    continue;
                var value = attr.Value;
                List<XAttribute> valueAttributes;
                if (!numIdValues.TryGetValue(value, out valueAttributes))
                {
                    valueAttributes = new List<XAttribute>();
                    numIdValues.Add(value, valueAttributes);
                }
                valueAttributes.Add(attr);
            }

            foreach (XElement remote_abstractNum in remote_abstractNums)
            {
                XAttribute abstractNumId = remote_abstractNum.Attribute(nameAbstractNumId);
                if (abstractNumId != null)
                {
                    String abstractNumIdValue = abstractNumId.Value;
                    abstractNumId.SetValue(guidd);

                    foreach (XElement remote_num in remote_nums)
                    {
                        var remote_num_value = remote_num.Attribute(nameNumId).Value;
                        List<XAttribute> valueAttributes;
                        if (numIdValues.TryGetValue(remote_num_value, out valueAttributes))
                        {
                            foreach (var numIdAttr in valueAttributes)
                                numIdAttr.SetValue(guidd2);
                        }

                        remote_num.SetAttributeValue(nameNumId, guidd2);

                        XElement e = remote_num.Element(nameAbstractNumId);
                        if (e != null)
                        {
                            XAttribute a2 = e.Attribute(nameVal);
                            if (a2 != null && a2.Value.Equals(abstractNumIdValue))
                                a2.SetValue(guidd);
                        }

                        guidd2++;
                    }
                }

                guidd++;
            }

            // Checking whether there were more than 0 elements, helped me get rid of exceptions thrown while using InsertDocument
            if (numbering.Root.Elements(nameAbstractNum).Count() > 0)
                numbering.Root.Elements(nameAbstractNum).Last().AddAfterSelf(remote_abstractNums);

            if (numbering.Root.Elements(nameNum).Count() > 0)
                numbering.Root.Elements(nameNum).Last().AddAfterSelf(remote_nums);
        }
For completeness here's the parallelized merge method:
        public DocX MergeDocuments(IList<DocX> documents)
        {
            while (documents.Count > 1)
            {
                int count = documents.Count >> 1;
                Parallel.For(0, count, di =>
                {
                    int i2 = di + di;
                    DocX x = DocAdd(documents[i2], documents[i2 + 1]);
                    if (x != documents[i2])
                        documents[i2].Dispose();
                    documents[i2] = x;
                }
                );
                for (int ic = 1; ic < documents.Count; ++ic)
                {
                    documents[ic].Dispose();
                    documents.RemoveAt(ic);
                }
            }
            return documents[0];
        }

        public DocX DocAdd(DocX document, DocX add)
        {
            document.InsertDocument(add);
            return document;
        }