JoesDataDiner · June 10, 2013 22:32
diff --git a/GetOfficeMetadata.R b/GetOfficeMetadata.R
 library(XML)
 #use R's inbuilt unzip function, knowing that the required metadata is in docProps/core.xml
 doc = xmlInternalTreeParse(unzip('test.docx','docProps/core.xml'))
 #define the namespace
 ns=c('dc'= 'http://purl.org/dc/elements/1.1/')
 #extract the author using xpath query
 author = xmlValue(getNodeSet(doc, '/*/dc:creator', namespaces=ns)[[1]])
	library(XML)
	#use R's inbuilt unzip function, knowing that the required metadata is in docProps/core.xml
	doc = xmlInternalTreeParse(unzip('test.docx','docProps/core.xml'))
	#define the namespace
	ns=c('dc'= 'http://purl.org/dc/elements/1.1/')
	#extract the author using xpath query
	author = xmlValue(getNodeSet(doc, '/*/dc:creator', namespaces=ns)[[1]])