<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.1/metadata.xsd"><identifier identifierType="Handle">10622/XMCZLZ</identifier><creators><creator><creatorName nameType="Personal">Smit, Renate</creatorName><givenName>Renate</givenName><familyName>Smit</familyName><nameIdentifier SchemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">0009-0005-1070-636X</nameIdentifier><affiliation>Huygens Institute</affiliation></creator></creators><titles><title>GLOBALISE - VOC Document Segmentation Dataset</title></titles><publisher>IISH Data Collection</publisher><publicationYear>2025</publicationYear><subjects><subject>Arts and Humanities</subject><subject schemeURI="https://www.wikidata.org/wiki/Q166118" subjectScheme="WikiData">archive</subject><subject schemeURI="https://www.wikidata.org/wiki/Q159766" subjectScheme="WikiData">Dutch East India Company</subject></subjects><contributors><contributor contributorType="ContactPerson"><contributorName nameType="Personal">Pepping, Kay</contributorName><givenName>Kay</givenName><familyName>Pepping</familyName><affiliation>Huygens Instituut</affiliation></contributor></contributors><dates><date dateType="Submitted">2025-09-01</date><date dateType="Updated">2025-09-22</date></dates><resourceType resourceTypeGeneral="Dataset"/><sizes><size>36753</size><size>53985</size><size>70410</size><size>56757</size><size>27160</size><size>67907</size><size>100641</size><size>100304</size><size>44590</size><size>27698</size><size>30522</size><size>33946</size><size>6900</size><size>5980</size><size>28868</size><size>8322</size><size>7075</size><size>8912</size><size>9657</size><size>26081</size><size>185805</size></sizes><formats><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>text/tab-separated-values</format><format>application/pdf</format></formats><version>1.0</version><rightsList><rights rightsURI="info:eu-repo/semantics/openAccess"/><rights rightsURI="http://creativecommons.org/licenses/by/4.0">CC BY 4.0</rights></rightsList><descriptions><description descriptionType="Abstract">This dataset contains detailed annotations of Dutch East India Company (VOC) archival documents based on the TANAP (Towards a New Age of Partnership) project. The dataset provides precise boundaries and classifications for documents within digitized archival volumes, serving as training data for machine learning approaches to historical document segmentation and classification. This work supports the broader goal of making VOC archives more accessible beyond traditional finding aids that often reflect colonial perspectives.</description></descriptions><geoLocations/></resource>