{"dcterms:modified":"2025-09-22","dcterms:creator":"IISH Data Collection","@type":"ore:ResourceMap","@id":"https://datasets.iisg.amsterdam/api/datasets/export?exporter=OAI_ORE&persistentId=https://hdl.handle.net/10622/XMCZLZ","ore:describes":{"citation:topicClassification":[{"citation:topicClassValue":"archive","citation:topicClassVocab":"WikiData","citation:topicClassVocabURI":"https://www.wikidata.org/wiki/Q166118"},{"citation:topicClassValue":"Dutch East India Company","citation:topicClassVocab":"WikiData","citation:topicClassVocabURI":"https://www.wikidata.org/wiki/Q159766"}],"citation:depositor":"Pepping, Kay","dateOfDeposit":"2025-09-01","title":"GLOBALISE - VOC Document Segmentation Dataset","subject":"Arts and Humanities","publication":{"publicationCitation":"Schnober, C., Smit, R., Kuruppath, M., Pepping, K., van Wissen, L., & Petram, L. (2024). Page Embeddings: Extracting and Classifying Historical Documents with Generic Vector Representations. In Proceedings of the Computational Humanities Research Conference 2024: Aarhus, Denmark, December 4-6, 2024 (Vol. 3834, pp. 999-1011). (CEUR Workshop Proceedings). https://ceur-ws.org/Vol-3834/paper73.pdf","publicationURL":"https://ceur-ws.org/Vol-3834/paper73.pdf"},"citation:datasetContact":{"citation:datasetContactName":"Pepping, Kay","citation:datasetContactAffiliation":"Huygens Instituut","citation:datasetContactEmail":"kay.pepping@huygens.knaw.nl"},"author":{"citation:authorName":"Smit, Renate","citation:authorAffiliation":"Huygens Institute","authorIdentifierScheme":"ORCID","authorIdentifier":"https://orcid.org/0009-0005-1070-636X"},"citation:dsDescription":{"citation:dsDescriptionValue":"This dataset contains detailed annotations of Dutch East India Company (VOC) archival documents based on the TANAP (Towards a New Age of Partnership) project. The dataset provides precise boundaries and classifications for documents within digitized archival volumes, serving as training data for machine learning approaches to historical document segmentation and classification. This work supports the broader goal of making VOC archives more accessible beyond traditional finding aids that often reflect colonial perspectives."},"@id":"https://hdl.handle.net/10622/XMCZLZ","@type":["ore:Aggregation","schema:Dataset"],"schema:version":"1.0","schema:name":"GLOBALISE - VOC Document Segmentation Dataset","schema:dateModified":"2025-09-22 09:20:11.262","schema:datePublished":"2025-09-22","schema:license":"http://creativecommons.org/licenses/by/4.0","dvcore:fileTermsOfAccess":{"dvcore:fileRequestAccess":true},"schema:includedInDataCatalog":"IISH Data Collection","schema:isPartOf":{"schema:name":"GLOBALISE","@id":"https://datasets.iisg.amsterdam/dataverse/globalise","schema:description":"This dataverse hosts all datasets collected and curated as part of the GLOBALISE project (2021-2026).","schema:isPartOf":{"schema:name":"IISH Data Collection","@id":"https://datasets.iisg.amsterdam/dataverse/IISH","schema:description":"The IISH Data Collection contains micro-, meso-, and macro-level datasets on demographic, social and economic history. To access restricted datasets, please create an account with an institutional mail address.\n\nYou can also <b>query our Linked Open Datasets via <a href=\"https://druid.datalegend.net\">Druid</a></b>.<br><br>\n\nFor questions regarding the use of Dataverse, consult the <a href=\"http://dataverse-guides.readthedocs.org/en/latest/user/dataverse-management.html\">documentation</a>."}},"ore:aggregates":[{"schema:name":"1120 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34885","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34885?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":35875,"dvcore:storageIdentifier":"file://199059d1449-c2da282f8312","dvcore:currentIngestedName":"1120 - Document Segmentation.tab","dvcore:UNF":"UNF:6:PaALHGHcxn8vZzdIoK338Q==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"b1181ea0ea79e7d27009010479d572f5"}},{"schema:name":"1267 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34878","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34878?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":52653,"dvcore:storageIdentifier":"file://199059d14e1-b1098bc15fa1","dvcore:currentIngestedName":"1267 - Document Segmentation.tab","dvcore:UNF":"UNF:6:mbuRDVkEBgHKzZip9MFlbw==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"ce4e898860fe5e46eed56754c337f896"}},{"schema:name":"1274 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34896","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34896?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":68669,"dvcore:storageIdentifier":"file://199059d157a-96c24b2fbab8","dvcore:currentIngestedName":"1274 - Document Segmentation.tab","dvcore:UNF":"UNF:6:0CRaDzC5pJN/To8jr+/5aA==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"a6eeeb67f9b1ec32bb278fcdb716e69d"}},{"schema:name":"1539 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34889","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34889?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":55376,"dvcore:storageIdentifier":"file://199059d160f-fd7101c200c6","dvcore:currentIngestedName":"1539 - Document Segmentation.tab","dvcore:UNF":"UNF:6:EfwH8NmoW4Itv6flCBnuow==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"f1fe599e66c5c3d5124f4ff0d518b170"}},{"schema:name":"1547 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34898","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34898?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":26564,"dvcore:storageIdentifier":"file://199059d16b2-f24efa459540","dvcore:currentIngestedName":"1547 - Document Segmentation.tab","dvcore:UNF":"UNF:6:HY7HTyDWzOhdpb0O4TYIPQ==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"8d345de07e0f69cf03a7d748512dd302"}},{"schema:name":"1557 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34884","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34884?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":66223,"dvcore:storageIdentifier":"file://199059d1754-af3446b71e5e","dvcore:currentIngestedName":"1557 - Document Segmentation.tab","dvcore:UNF":"UNF:6:AtuvqqF04kp2zCeUsHK6Mg==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"12dfef38f2ea361d536bc520be6933d2"}},{"schema:name":"2448 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34880","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34880?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":98079,"dvcore:storageIdentifier":"file://199059d17f2-56270073beef","dvcore:currentIngestedName":"2448 - Document Segmentation.tab","dvcore:UNF":"UNF:6:/jd92CDEedNui+xXLnJYDw==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"b4bff10d9116dbbb68ffb02c18287ecf"}},{"schema:name":"2548 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34886","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34886?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":97981,"dvcore:storageIdentifier":"file://199059d18b4-eb851da0e7da","dvcore:currentIngestedName":"2548 - Document Segmentation.tab","dvcore:UNF":"UNF:6:fpxgSlWx2cLLT64IDyoP+Q==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"a8669069d74e64e680f2bc0444bb884f"}},{"schema:name":"2555 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34881","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34881?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":43579,"dvcore:storageIdentifier":"file://199059d194d-b0e26cce0c4b","dvcore:currentIngestedName":"2555 - Document Segmentation.tab","dvcore:UNF":"UNF:6:r9a3wBxqJMfJx+cMI32S9g==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"3666fe704414b573cfb0e156c6d675c9"}},{"schema:name":"2775 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34882","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34882?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":27085,"dvcore:storageIdentifier":"file://199059d19e4-90138de1243c","dvcore:currentIngestedName":"2775 - Document Segmentation.tab","dvcore:UNF":"UNF:6:um34/99KE/qyz4SrYcHGMg==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"f7e59e985758b95c2417c392b64cd383"}},{"schema:name":"3142 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34887","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34887?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":29862,"dvcore:storageIdentifier":"file://199059d1a77-7780f10aae2b","dvcore:currentIngestedName":"3142 - Document Segmentation.tab","dvcore:UNF":"UNF:6:XpiE2Tq7UgwCxZmu5aKeVQ==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"83fb1658dfe4250426f1119475eed174"}},{"schema:name":"3891 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34888","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34888?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":33184,"dvcore:storageIdentifier":"file://199059d0ea1-9ba9dce8daab","dvcore:currentIngestedName":"3891 - Document Segmentation.tab","dvcore:UNF":"UNF:6:HiQtpXQp6H0YB6g8WYVHTQ==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"d5a1d1a0aa730598210cbe5476fbcbc0"}},{"schema:name":"7923 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34895","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34895?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":6814,"dvcore:storageIdentifier":"file://199059d0f49-9d1ca82f452b","dvcore:currentIngestedName":"7923 - Document Segmentation.tab","dvcore:UNF":"UNF:6:GOJcy4ef7RrxF7beQlWc5Q==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"dcd95f496b28c4aaa5838d8e35da866a"}},{"schema:name":"8023 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34897","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34897?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":5918,"dvcore:storageIdentifier":"file://199059d0ffc-4b7469883a16","dvcore:currentIngestedName":"8023 - Document Segmentation.tab","dvcore:UNF":"UNF:6:BJ3ukW8BMVUg5oCWmk06Ig==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"1c94324b988776b31d271c7733bb58cc"}},{"schema:name":"8121 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34891","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34891?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":28227,"dvcore:storageIdentifier":"file://199059d10a4-485546159074","dvcore:currentIngestedName":"8121 - Document Segmentation.tab","dvcore:UNF":"UNF:6:pJbJM07imrdbYvheLKJl2Q==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"5dfb21441236e0cb58124fcce379bce0"}},{"schema:name":"8237 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34879","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34879?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":8205,"dvcore:storageIdentifier":"file://199059d114c-4544a8b2232c","dvcore:currentIngestedName":"8237 - Document Segmentation.tab","dvcore:UNF":"UNF:6:cIuvOf6jG28J3Gj4eZe/9Q==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"e59e88440e419fe2a9be3fcff87f9a79"}},{"schema:name":"8276 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34893","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34893?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":6987,"dvcore:storageIdentifier":"file://199059d11e0-c00950b5e4da","dvcore:currentIngestedName":"8276 - Document Segmentation.tab","dvcore:UNF":"UNF:6:p0nklJvNDQsJZbEcYgTA7w==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"9bea26c79d50aff5694a9d1b8c1c6eaf"}},{"schema:name":"8284 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34883","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34883?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":8778,"dvcore:storageIdentifier":"file://199059d127a-19f8df2d7eb5","dvcore:currentIngestedName":"8284 - Document Segmentation.tab","dvcore:UNF":"UNF:6:WulLRLNd1GHvC6YXJNxpWg==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"241d2672d9ffeeb7325f8aa2af8c41eb"}},{"schema:name":"8697 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34892","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34892?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":9515,"dvcore:storageIdentifier":"file://199059d130f-fc967121550b","dvcore:currentIngestedName":"8697 - Document Segmentation.tab","dvcore:UNF":"UNF:6:/P0ICj2jlWffC4BpSZTM3g==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"54f8b78c68dbd25c1d74dca327baa39f"}},{"schema:name":"8834 - Document Segmentation.csv","dvcore:restricted":false,"schema:version":3,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=34890","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/34890?format=original","@type":"ore:AggregatedResource","schema:fileFormat":"text/csv","dvcore:filesize":25516,"dvcore:storageIdentifier":"file://199059d13b3-4052eee50ea5","dvcore:currentIngestedName":"8834 - Document Segmentation.tab","dvcore:UNF":"UNF:6:hERnkAlAKFo5YYdwWoj1DQ==","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"f94c908b3948faefa30f88a9436b70b8"}},{"schema:name":"README - GLOBALISE - VOC Document Segmentation Dataset.pdf","dvcore:restricted":false,"schema:version":1,"dvcore:datasetVersionId":2139,"@id":"https://datasets.iisg.amsterdam/file.xhtml?fileId=35027","schema:sameAs":"https://datasets.iisg.amsterdam/api/access/datafile/35027","@type":"ore:AggregatedResource","schema:fileFormat":"application/pdf","dvcore:filesize":185805,"dvcore:storageIdentifier":"file://19957b44991-3430791f74f6","dvcore:rootDataFileId":-1,"dvcore:checksum":{"@type":"MD5","@value":"331e5b49d90eedba4e386440a093362f"}}],"schema:hasPart":["https://datasets.iisg.amsterdam/file.xhtml?fileId=34885","https://datasets.iisg.amsterdam/file.xhtml?fileId=34878","https://datasets.iisg.amsterdam/file.xhtml?fileId=34896","https://datasets.iisg.amsterdam/file.xhtml?fileId=34889","https://datasets.iisg.amsterdam/file.xhtml?fileId=34898","https://datasets.iisg.amsterdam/file.xhtml?fileId=34884","https://datasets.iisg.amsterdam/file.xhtml?fileId=34880","https://datasets.iisg.amsterdam/file.xhtml?fileId=34886","https://datasets.iisg.amsterdam/file.xhtml?fileId=34881","https://datasets.iisg.amsterdam/file.xhtml?fileId=34882","https://datasets.iisg.amsterdam/file.xhtml?fileId=34887","https://datasets.iisg.amsterdam/file.xhtml?fileId=34888","https://datasets.iisg.amsterdam/file.xhtml?fileId=34895","https://datasets.iisg.amsterdam/file.xhtml?fileId=34897","https://datasets.iisg.amsterdam/file.xhtml?fileId=34891","https://datasets.iisg.amsterdam/file.xhtml?fileId=34879","https://datasets.iisg.amsterdam/file.xhtml?fileId=34893","https://datasets.iisg.amsterdam/file.xhtml?fileId=34883","https://datasets.iisg.amsterdam/file.xhtml?fileId=34892","https://datasets.iisg.amsterdam/file.xhtml?fileId=34890","https://datasets.iisg.amsterdam/file.xhtml?fileId=35027"]},"@context":{"author":"http://purl.org/dc/terms/creator","authorIdentifier":"http://purl.org/spar/datacite/AgentIdentifier","authorIdentifierScheme":"http://purl.org/spar/datacite/AgentIdentifierScheme","citation":"https://dataverse.org/schema/citation/","dateOfDeposit":"http://purl.org/dc/terms/dateSubmitted","dcterms":"http://purl.org/dc/terms/","dvcore":"https://dataverse.org/schema/core#","ore":"http://www.openarchives.org/ore/terms/","publication":"http://purl.org/dc/terms/isReferencedBy","publicationCitation":"http://purl.org/dc/terms/bibliographicCitation","publicationURL":"https://schema.org/distribution","schema":"http://schema.org/","subject":"http://purl.org/dc/terms/subject","title":"http://purl.org/dc/terms/title"}}