Open XML Markup Explorer App for Word 2013

This screen-cast introduces a small Word 2013 “app” that enables you to select content in a Word document, click a button in a task pane, and see the markup for the selected content.  Further, you can modify the markup in the task pane, click a button, and replace the selected content in the document with the markup.

This functionality is pretty valuable to a developer new to Open XML development.  You can explore the markup for a wide variety of Word document artifacts.  It is also helpful to experienced Open XML developers.  I, of course, can’t remember all of the details of all aspects of Word markup, and when writing a complex query, it is helpful to be able to have good examples of the markup side-by-side to Visual Studio as I write the code.

The video demos the app, and then walks through the process of building it.  The app is included in an attached zip file, but if you want to build the app yourself (an enjoyable and instructive task), the listings follow the video.

Following is the HTML file showing the inclusion of the linq.js and ltxml.js modules, as well as the small about of UI markup.

<!DOCTYPE html>
<html>
<head>
   
<meta charset="UTF-8" />
   
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
   
<title>WordMarkupExplorer</title>

   
<link rel="stylesheet" type="text/css" href="../Content/Office.css" />

   
<!-- Add your CSS styles to the following file -->
   
<link rel="stylesheet" type="text/css" href="../Content/App.css" />

   
<script src="../Scripts/jquery-1.7.1.js"></script>

   
<!-- Use the CDN reference to Office.js when deploying your app -->
   
<!--<script src="https://appsforoffice.microsoft.com/lib/1.0/hosted/office.js"></script>-->

   
<!-- Use the local script references for Office.js to enable offline debugging -->
   
<script src="../Scripts/Office/1.0/MicrosoftAjax.js"></script>
   
<script src="../Scripts/Office/1.0/office.js"></script>

   
<!-- Add your JavaScript to the following file -->
   
<script src="../Scripts/linq.js"></script>
   
<script src="../Scripts/ltxml.js"></script>
   
<script src="../Scripts/WordMarkupExplorer.js"></script>
</head>
<body>
   
<div id="Content" style="padding: 0; margin: 0;">
       
<input type="button" value="Get Markup" id="getMarkupBtn" style="padding: 0px; width: 100px;" />
       
<input type="button" value="Set Markup" id="setMarkupBtn" style="padding: 0px; width: 100px;" />
   
</div>
   
<div id="Error" style="margin: 0; padding: 0;"></div>
   
<div id="Markup" style="padding: 0; margin: 0;">
       
<textarea rows="33" cols="120" id="txMarkup" spellcheck="false" style="font: 12px courier; height: 100%; min-height: 50%; padding: 0; width: 100%; margin: 10pt 0 0 0;"></textarea>
   
</div>
</body>
</html>

Here is the listing of WordMarkupExplorer.js:

// This function is run when the app is ready to start interacting with the host application
// It ensures the DOM is ready before adding click handlers to buttons
Office.initialize = function (reason) {
    $
(document).ready(function () {
        $
('#getMarkupBtn').click(function () {
            getMarkup
();
       
});
        $
('#setMarkupBtn').click(function () {
            setMarkup
();
       
});
        $
('#Error').hide(0, function () {
            $
('#Error').html("<p/>");
       
});
   
});
};

var getMarkup, setMarkup;

(function (root) {

   
var XDocument = Ltxml.XDocument;
   
var XElement = Ltxml.XElement;
   
var XAttribute = Ltxml.XAttribute;
   
var XNamespace = Ltxml.XNamespace;
   
var XName = Ltxml.XName;

   
var wpc = new XNamespace(
       
'http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas');
   
var mc = new XNamespace(
       
"http://schemas.openxmlformats.org/markup-compatibility/2006");
   
var o = new XNamespace(
       
"urn:schemas-microsoft-com:office:office");
   
var r = new XNamespace(
       
'http://schemas.openxmlformats.org/officeDocument/2006/relationships');
   
var m = new XNamespace(
       
"http://schemas.openxmlformats.org/officeDocument/2006/math");
   
var v = new XNamespace("urn:schemas-microsoft-com:vml");
   
var wp14 = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing");
   
var wp = new XNamespace(
       
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing");
   
var w10 = new XNamespace("urn:schemas-microsoft-com:office:word");
   
var w = new XNamespace(
       
'http://schemas.openxmlformats.org/wordprocessingml/2006/main');
   
var w14 = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2010/wordml");
   
var w15 = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2012/wordml");
   
var wpg = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2010/wordprocessingGroup");
   
var wpi = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2010/wordprocessingInk");
   
var wne = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2006/wordml");
   
var wps = new XNamespace(
       
"http://schemas.microsoft.com/office/word/2010/wordprocessingShape");

   
var W = {
        rsid
: new XName(w + "rsid"),
        rsidDel
: new XName(w + "rsidDel"),
        rsidP
: new XName(w + "rsidP"),
        rsidR
: new XName(w + "rsidR"),
        rsidRDefault
: new XName(w + "rsidRDefault"),
        rsidRPr
: new XName(w + "rsidRPr"),
        rsidSect
: new XName(w + "rsidSect"),
        rsidTr
: new XName(w + "rsidTr"),
        sectPr
: new XName(w + "sectPr"),
        body
: new XName(w + "body"),
   
};

   
function simplifyTransform(node) {
       
var element = node;
       
if (element.nodeType === 'Element') {
           
if (element.name === W.sectPr && element.parent.name === W.body) {
               
return null;
           
}
           
if (element.name == W.rsid)
               
return null;
           
var newElement = new Ltxml.XElement(element.name,
                element
.attributes().where(function (a) {
                   
return a.name != W.rsid &&
                    a
.name != W.rsidDel &&
                    a
.name != W.rsidP &&
                    a
.name != W.rsidR &&
                    a
.name != W.rsidRDefault &&
                    a
.name != W.rsidRPr &&
                    a
.name != W.rsidSect &&
                    a
.name != W.rsidTr;
               
}),
                element
.nodes().select(function (n) {
                   
return simplifyTransform(n);
               
}))
           
return newElement;
       
}
       
return node;
   
}

   
var regArray = [
       
{
            regEx
: /xmlns:[^\r\n]*"(\r\n|\n|\r)/g,
            replaceWith
: ""
       
},
       
{
            regEx
: /xmlns:[^\r\n]*'(\r\n|\n|\r)/g,
            replaceWith
: ""
       
},
       
{
            regEx
: /xmlns:[^>\r\n]*">/g,
            replaceWith
: ">"
       
},
       
{
            regEx
: /xmlns:[^>\r\n]*'>/g,
            replaceWith
: ">"
       
},
   
];

   
function simplifyMarkup(body) {
       
var simplified = simplifyTransform(body);
        simplified
.add(
           
new XAttribute(XNamespace.xmlns + "wpc", wpc),
           
new XAttribute(XNamespace.xmlns + "mc", mc),
           
new XAttribute(XNamespace.xmlns + "o", o),
           
new XAttribute(XNamespace.xmlns + "r", r),
           
new XAttribute(XNamespace.xmlns + "m", m),
           
new XAttribute(XNamespace.xmlns + "v", v),
           
new XAttribute(XNamespace.xmlns + "wp14", wp14),
           
new XAttribute(XNamespace.xmlns + "wp", wp),
           
new XAttribute(XNamespace.xmlns + "w10", w10),
           
new XAttribute(XNamespace.xmlns + "w", w),
           
new XAttribute(XNamespace.xmlns + "w14", w14),
           
new XAttribute(XNamespace.xmlns + "w15", w15),
           
new XAttribute(XNamespace.xmlns + "wpg", wpg),
           
new XAttribute(XNamespace.xmlns + "wpi", wpi),
           
new XAttribute(XNamespace.xmlns + "wne", wne),
           
new XAttribute(XNamespace.xmlns + "wps", wps));
       
var simpString = simplified.toString(true);
       
for (var i = 0; i < regArray.length; i++) {
            simpString
= simpString.replace(regArray[i].regEx,
                regArray
[i].replaceWith);
       
}
        simpString
= simpString.replace(/<w:body[ ]*>(\r\n|\n|\r)/, "");
        simpString
= simpString.replace(/<\/w:body>(\r\n|\n|\r)/, "");
        simpString
= simpString.replace(/^  /g, "");
        simpString
= simpString.replace(/(\r\n|\n|\r)  /g, "\r\n");
        simpString
= simpString.replace(/<w:p\/>(\r\n|\n|\r)$/, "");
       
return simpString;
   
}

   
// Reads the data from current selection of the document and displays it in a textbox
    getMarkup
= function () {
       
Office.context.document.getSelectedDataAsync(Office.CoercionType.Ooxml,
       
function (asyncResult) {
           
var t = document.getElementById('txMarkup');
           
var error = asyncResult.error;
           
if (asyncResult.status === Office.AsyncResultStatus.Failed) {
                t
.value = error.name + ": " + error.message;
           
}
           
else {
               
var parsedResult = XElement.parse(asyncResult.value);
               
var body = parsedResult.descendants(w + "body").firstOrDefault();
               
var simpString = simplifyMarkup(body);
                t
.value = simpString;
                $
('#Error').hide(500, function () {
                    $
('#Error').html("");
               
});
           
}
       
});
   
}

   
function fixUpMarkup(markup) {
       
var newMarkup =
'<w:body xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas"' +
' xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"' +
' xmlns:o="urn:schemas-microsoft-com:office:office"' +
' xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"' +
' xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"' +
' xmlns:v="urn:schemas-microsoft-com:vml"' +
' xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing"' +
' xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"' +
' xmlns:w10="urn:schemas-microsoft-com:office:word"' +
' xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"' +
' xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"' +
' xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml"' +
' xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup"' +
' xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk"' +
' xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml"' +
' xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape"' +
' mc:Ignorable="w14 w15 wp14">' + markup + "</w:body>";
       
return newMarkup;
   
}

   
// Reads the data from current selection of the document and displays it in a textbox
    setMarkup
= function () {
       
Office.context.document.getSelectedDataAsync(Office.CoercionType.Ooxml,
       
function (asyncResult) {
           
var t = document.getElementById('txMarkup');
           
var error = asyncResult.error;
           
if (asyncResult.status === Office.AsyncResultStatus.Failed) {
                $
('#Error').html(
"<p style='font-size: 12px;background-color: red; color: white;'>Error setting markup: "
                   
+ error.name + ": " + error.message + "</p>");
                $
('#Error').show(500, function () { });
           
}
           
else {
               
var parsedResult = XDocument.parse(asyncResult.value);
               
var body = parsedResult.descendants(w + "body").firstOrDefault();
               
var newMarkup = fixUpMarkup(t.value);
               
try {
                   
var newBody = XElement.parse(newMarkup);
                    body
.replaceWith(newBody);
                    $
('#Error').hide(500, function () {
                        $
('#Error').html("");
                   
});
                   
Office.context.document.setSelectedDataAsync(
                        parsedResult
.toString(false), {
                            coercionType
: "ooxml"
                       
}, function (asyncResult) {
                           
var error = asyncResult.error;
                           
if (asyncResult.status === "failed") {
                                $
('#Error').html(
"<p style='font-size: 12px;background-color: red; color: white;'>Error setting markup: "
                                   
+ error.name + ": " + error.message + "</p>");
                                $
('#Error').show(500, function () { });
                           
}
                           
else {
                               
// get the markup back out from Word so that display
                               
// any adjustments that Word made to the markup.
                                getMarkup
();
                           
}
                       
});
               
}
               
catch (e) {
                    $
('#Error').html(
"<p style='font-size: 12px;background-color: red; color: white;'>XML Parsing Error: "
                       
+ e.toString() + "</p>");
                    $
('#Error').show(500, function () { });
               
}

           
}
       
});
   
}
})(this);

Download – Example Code