{"id":34877,"identifier":"XMCZLZ","persistentUrl":"https://hdl.handle.net/10622/XMCZLZ","protocol":"hdl","authority":"10622","publisher":"IISH Data Collection","publicationDate":"2025-09-22","storageIdentifier":"file://10622/XMCZLZ","datasetVersion":{"id":2139,"datasetId":34877,"datasetPersistentId":"hdl:10622/XMCZLZ","storageIdentifier":"file://10622/XMCZLZ","versionNumber":1,"versionMinorNumber":0,"versionState":"RELEASED","UNF":"UNF:6:6pNG8HfphDeciynT9YU+RA==","lastUpdateTime":"2025-09-22T07:20:11Z","releaseTime":"2025-09-22T07:20:11Z","createTime":"2025-09-01T14:10:23Z","license":{"name":"CC BY 4.0","uri":"http://creativecommons.org/licenses/by/4.0"},"fileAccessRequest":true,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"GLOBALISE - VOC Document Segmentation Dataset"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Smit, Renate"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Huygens Institute"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"https://orcid.org/0009-0005-1070-636X"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Pepping, Kay"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Huygens Instituut"},"datasetContactEmail":{"typeName":"datasetContactEmail","multiple":false,"typeClass":"primitive","value":"kay.pepping@huygens.knaw.nl"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"This dataset contains detailed annotations of Dutch East India Company (VOC) archival documents based on the TANAP (Towards a New Age of Partnership) project. The dataset provides precise boundaries and classifications for documents within digitized archival volumes, serving as training data for machine learning approaches to historical document segmentation and classification. This work supports the broader goal of making VOC archives more accessible beyond traditional finding aids that often reflect colonial perspectives."}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Arts and Humanities"]},{"typeName":"topicClassification","multiple":true,"typeClass":"compound","value":[{"topicClassValue":{"typeName":"topicClassValue","multiple":false,"typeClass":"primitive","value":"archive"},"topicClassVocab":{"typeName":"topicClassVocab","multiple":false,"typeClass":"primitive","value":"WikiData"},"topicClassVocabURI":{"typeName":"topicClassVocabURI","multiple":false,"typeClass":"primitive","value":"https://www.wikidata.org/wiki/Q166118"}},{"topicClassValue":{"typeName":"topicClassValue","multiple":false,"typeClass":"primitive","value":"Dutch East India Company"},"topicClassVocab":{"typeName":"topicClassVocab","multiple":false,"typeClass":"primitive","value":"WikiData"},"topicClassVocabURI":{"typeName":"topicClassVocabURI","multiple":false,"typeClass":"primitive","value":"https://www.wikidata.org/wiki/Q159766"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Schnober, C., Smit, R., Kuruppath, M., Pepping, K., van Wissen, L., & Petram, L. (2024). Page Embeddings: Extracting and Classifying Historical Documents with Generic Vector Representations. In Proceedings of the Computational Humanities Research Conference 2024: Aarhus, Denmark, December 4-6, 2024 (Vol. 3834, pp. 999-1011). (CEUR Workshop Proceedings). https://ceur-ws.org/Vol-3834/paper73.pdf"},"publicationURL":{"typeName":"publicationURL","multiple":false,"typeClass":"primitive","value":"https://ceur-ws.org/Vol-3834/paper73.pdf"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Pepping, Kay"},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2025-09-01"}]}},"files":[{"label":"1120 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34885,"persistentId":"","pidURL":"","filename":"1120 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":36753,"storageIdentifier":"file://199059d1449-c2da282f8312","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":35875,"originalFileName":"1120 - Document Segmentation.csv","UNF":"UNF:6:PaALHGHcxn8vZzdIoK338Q==","rootDataFileId":-1,"md5":"b1181ea0ea79e7d27009010479d572f5","checksum":{"type":"MD5","value":"b1181ea0ea79e7d27009010479d572f5"},"creationDate":"2025-09-01"}},{"label":"1267 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34878,"persistentId":"","pidURL":"","filename":"1267 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":53985,"storageIdentifier":"file://199059d14e1-b1098bc15fa1","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":52653,"originalFileName":"1267 - Document Segmentation.csv","UNF":"UNF:6:mbuRDVkEBgHKzZip9MFlbw==","rootDataFileId":-1,"md5":"ce4e898860fe5e46eed56754c337f896","checksum":{"type":"MD5","value":"ce4e898860fe5e46eed56754c337f896"},"creationDate":"2025-09-01"}},{"label":"1274 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34896,"persistentId":"","pidURL":"","filename":"1274 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":70410,"storageIdentifier":"file://199059d157a-96c24b2fbab8","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":68669,"originalFileName":"1274 - Document Segmentation.csv","UNF":"UNF:6:0CRaDzC5pJN/To8jr+/5aA==","rootDataFileId":-1,"md5":"a6eeeb67f9b1ec32bb278fcdb716e69d","checksum":{"type":"MD5","value":"a6eeeb67f9b1ec32bb278fcdb716e69d"},"creationDate":"2025-09-01"}},{"label":"1539 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34889,"persistentId":"","pidURL":"","filename":"1539 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":56757,"storageIdentifier":"file://199059d160f-fd7101c200c6","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":55376,"originalFileName":"1539 - Document Segmentation.csv","UNF":"UNF:6:EfwH8NmoW4Itv6flCBnuow==","rootDataFileId":-1,"md5":"f1fe599e66c5c3d5124f4ff0d518b170","checksum":{"type":"MD5","value":"f1fe599e66c5c3d5124f4ff0d518b170"},"creationDate":"2025-09-01"}},{"label":"1547 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34898,"persistentId":"","pidURL":"","filename":"1547 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":27160,"storageIdentifier":"file://199059d16b2-f24efa459540","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":26564,"originalFileName":"1547 - Document Segmentation.csv","UNF":"UNF:6:HY7HTyDWzOhdpb0O4TYIPQ==","rootDataFileId":-1,"md5":"8d345de07e0f69cf03a7d748512dd302","checksum":{"type":"MD5","value":"8d345de07e0f69cf03a7d748512dd302"},"creationDate":"2025-09-01"}},{"label":"1557 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34884,"persistentId":"","pidURL":"","filename":"1557 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":67907,"storageIdentifier":"file://199059d1754-af3446b71e5e","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":66223,"originalFileName":"1557 - Document Segmentation.csv","UNF":"UNF:6:AtuvqqF04kp2zCeUsHK6Mg==","rootDataFileId":-1,"md5":"12dfef38f2ea361d536bc520be6933d2","checksum":{"type":"MD5","value":"12dfef38f2ea361d536bc520be6933d2"},"creationDate":"2025-09-01"}},{"label":"2448 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34880,"persistentId":"","pidURL":"","filename":"2448 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":100641,"storageIdentifier":"file://199059d17f2-56270073beef","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":98079,"originalFileName":"2448 - Document Segmentation.csv","UNF":"UNF:6:/jd92CDEedNui+xXLnJYDw==","rootDataFileId":-1,"md5":"b4bff10d9116dbbb68ffb02c18287ecf","checksum":{"type":"MD5","value":"b4bff10d9116dbbb68ffb02c18287ecf"},"creationDate":"2025-09-01"}},{"label":"2548 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34886,"persistentId":"","pidURL":"","filename":"2548 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":100304,"storageIdentifier":"file://199059d18b4-eb851da0e7da","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":97981,"originalFileName":"2548 - Document Segmentation.csv","UNF":"UNF:6:fpxgSlWx2cLLT64IDyoP+Q==","rootDataFileId":-1,"md5":"a8669069d74e64e680f2bc0444bb884f","checksum":{"type":"MD5","value":"a8669069d74e64e680f2bc0444bb884f"},"creationDate":"2025-09-01"}},{"label":"2555 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34881,"persistentId":"","pidURL":"","filename":"2555 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":44590,"storageIdentifier":"file://199059d194d-b0e26cce0c4b","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":43579,"originalFileName":"2555 - Document Segmentation.csv","UNF":"UNF:6:r9a3wBxqJMfJx+cMI32S9g==","rootDataFileId":-1,"md5":"3666fe704414b573cfb0e156c6d675c9","checksum":{"type":"MD5","value":"3666fe704414b573cfb0e156c6d675c9"},"creationDate":"2025-09-01"}},{"label":"2775 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34882,"persistentId":"","pidURL":"","filename":"2775 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":27698,"storageIdentifier":"file://199059d19e4-90138de1243c","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":27085,"originalFileName":"2775 - Document Segmentation.csv","UNF":"UNF:6:um34/99KE/qyz4SrYcHGMg==","rootDataFileId":-1,"md5":"f7e59e985758b95c2417c392b64cd383","checksum":{"type":"MD5","value":"f7e59e985758b95c2417c392b64cd383"},"creationDate":"2025-09-01"}},{"label":"3142 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34887,"persistentId":"","pidURL":"","filename":"3142 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":30522,"storageIdentifier":"file://199059d1a77-7780f10aae2b","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":29862,"originalFileName":"3142 - Document Segmentation.csv","UNF":"UNF:6:XpiE2Tq7UgwCxZmu5aKeVQ==","rootDataFileId":-1,"md5":"83fb1658dfe4250426f1119475eed174","checksum":{"type":"MD5","value":"83fb1658dfe4250426f1119475eed174"},"creationDate":"2025-09-01"}},{"label":"3891 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34888,"persistentId":"","pidURL":"","filename":"3891 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":33946,"storageIdentifier":"file://199059d0ea1-9ba9dce8daab","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":33184,"originalFileName":"3891 - Document Segmentation.csv","UNF":"UNF:6:HiQtpXQp6H0YB6g8WYVHTQ==","rootDataFileId":-1,"md5":"d5a1d1a0aa730598210cbe5476fbcbc0","checksum":{"type":"MD5","value":"d5a1d1a0aa730598210cbe5476fbcbc0"},"creationDate":"2025-09-01"}},{"label":"7923 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34895,"persistentId":"","pidURL":"","filename":"7923 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":6900,"storageIdentifier":"file://199059d0f49-9d1ca82f452b","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6814,"originalFileName":"7923 - Document Segmentation.csv","UNF":"UNF:6:GOJcy4ef7RrxF7beQlWc5Q==","rootDataFileId":-1,"md5":"dcd95f496b28c4aaa5838d8e35da866a","checksum":{"type":"MD5","value":"dcd95f496b28c4aaa5838d8e35da866a"},"creationDate":"2025-09-01"}},{"label":"8023 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34897,"persistentId":"","pidURL":"","filename":"8023 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":5980,"storageIdentifier":"file://199059d0ffc-4b7469883a16","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":5918,"originalFileName":"8023 - Document Segmentation.csv","UNF":"UNF:6:BJ3ukW8BMVUg5oCWmk06Ig==","rootDataFileId":-1,"md5":"1c94324b988776b31d271c7733bb58cc","checksum":{"type":"MD5","value":"1c94324b988776b31d271c7733bb58cc"},"creationDate":"2025-09-01"}},{"label":"8121 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34891,"persistentId":"","pidURL":"","filename":"8121 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":28868,"storageIdentifier":"file://199059d10a4-485546159074","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":28227,"originalFileName":"8121 - Document Segmentation.csv","UNF":"UNF:6:pJbJM07imrdbYvheLKJl2Q==","rootDataFileId":-1,"md5":"5dfb21441236e0cb58124fcce379bce0","checksum":{"type":"MD5","value":"5dfb21441236e0cb58124fcce379bce0"},"creationDate":"2025-09-01"}},{"label":"8237 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34879,"persistentId":"","pidURL":"","filename":"8237 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":8322,"storageIdentifier":"file://199059d114c-4544a8b2232c","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":8205,"originalFileName":"8237 - Document Segmentation.csv","UNF":"UNF:6:cIuvOf6jG28J3Gj4eZe/9Q==","rootDataFileId":-1,"md5":"e59e88440e419fe2a9be3fcff87f9a79","checksum":{"type":"MD5","value":"e59e88440e419fe2a9be3fcff87f9a79"},"creationDate":"2025-09-01"}},{"label":"8276 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34893,"persistentId":"","pidURL":"","filename":"8276 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":7075,"storageIdentifier":"file://199059d11e0-c00950b5e4da","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":6987,"originalFileName":"8276 - Document Segmentation.csv","UNF":"UNF:6:p0nklJvNDQsJZbEcYgTA7w==","rootDataFileId":-1,"md5":"9bea26c79d50aff5694a9d1b8c1c6eaf","checksum":{"type":"MD5","value":"9bea26c79d50aff5694a9d1b8c1c6eaf"},"creationDate":"2025-09-01"}},{"label":"8284 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34883,"persistentId":"","pidURL":"","filename":"8284 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":8912,"storageIdentifier":"file://199059d127a-19f8df2d7eb5","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":8778,"originalFileName":"8284 - Document Segmentation.csv","UNF":"UNF:6:WulLRLNd1GHvC6YXJNxpWg==","rootDataFileId":-1,"md5":"241d2672d9ffeeb7325f8aa2af8c41eb","checksum":{"type":"MD5","value":"241d2672d9ffeeb7325f8aa2af8c41eb"},"creationDate":"2025-09-01"}},{"label":"8697 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34892,"persistentId":"","pidURL":"","filename":"8697 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":9657,"storageIdentifier":"file://199059d130f-fc967121550b","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":9515,"originalFileName":"8697 - Document Segmentation.csv","UNF":"UNF:6:/P0ICj2jlWffC4BpSZTM3g==","rootDataFileId":-1,"md5":"54f8b78c68dbd25c1d74dca327baa39f","checksum":{"type":"MD5","value":"54f8b78c68dbd25c1d74dca327baa39f"},"creationDate":"2025-09-01"}},{"label":"8834 - Document Segmentation.tab","restricted":false,"version":3,"datasetVersionId":2139,"dataFile":{"id":34890,"persistentId":"","pidURL":"","filename":"8834 - Document Segmentation.tab","contentType":"text/tab-separated-values","filesize":26081,"storageIdentifier":"file://199059d13b3-4052eee50ea5","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":25516,"originalFileName":"8834 - Document Segmentation.csv","UNF":"UNF:6:hERnkAlAKFo5YYdwWoj1DQ==","rootDataFileId":-1,"md5":"f94c908b3948faefa30f88a9436b70b8","checksum":{"type":"MD5","value":"f94c908b3948faefa30f88a9436b70b8"},"creationDate":"2025-09-01"}},{"label":"README - GLOBALISE - VOC Document Segmentation Dataset.pdf","restricted":false,"version":1,"datasetVersionId":2139,"dataFile":{"id":35027,"persistentId":"","pidURL":"","filename":"README - GLOBALISE - VOC Document Segmentation Dataset.pdf","contentType":"application/pdf","filesize":185805,"storageIdentifier":"file://19957b44991-3430791f74f6","rootDataFileId":-1,"md5":"331e5b49d90eedba4e386440a093362f","checksum":{"type":"MD5","value":"331e5b49d90eedba4e386440a093362f"},"creationDate":"2025-09-17"}}],"citation":"Smit, Renate, 2025, \"GLOBALISE - VOC Document Segmentation Dataset\", https://hdl.handle.net/10622/XMCZLZ, IISH Data Collection, V1, UNF:6:6pNG8HfphDeciynT9YU+RA== [fileUNF]"}}