{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T18:21:13Z","timestamp":1747160473224,"version":"3.40.5"},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2016,6,1]],"date-time":"2016-06-01T00:00:00Z","timestamp":1464739200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Expert Systems with Applications"],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1016\/j.eswa.2015.12.040","type":"journal-article","created":{"date-parts":[[2016,1,8]],"date-time":"2016-01-08T21:15:19Z","timestamp":1452287719000},"page":"259-275","update-policy":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":4,"special_numbering":"C","title":["Predicate enrichment of aligned XPaths for wrapper induction"],"prefix":"10.1016","volume":"51","author":[{"ORCID":"https:\/\/linproxy.fan.workers.dev:443\/https\/orcid.org\/0000-0003-4900-8276","authenticated-orcid":false,"given":"Joachim","family":"Nielandt","sequence":"first","affiliation":[]},{"given":"Antoon","family":"Bronselaer","sequence":"additional","affiliation":[]},{"given":"Guy","family":"de Tr\u00e9","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.eswa.2015.12.040_bib0003","first-page":"126","article-title":"XPath-Wrapper Induction by generalizing tree traversal patterns","author":"Anton","year":"2005","journal-title":"Lernen, Wissensentdeckung und Adaptivitt (LWA)"},{"key":"10.1016\/j.eswa.2015.12.040_bib0004","series-title":"Proceedings of the 2003 ACM SIGMOD international conference on management of data SIGMOD \u201903","first-page":"337","article-title":"Extracting structured data from web pages","author":"Arasu","year":"2003"},{"year":"1999","series-title":"Modern information retrieval","author":"Baeza-Yates","key":"10.1016\/j.eswa.2015.12.040_bib0005"},{"key":"10.1016\/j.eswa.2015.12.040_bib0006","series-title":"Proceedings of 21st international conference on distributed computing systems","first-page":"361","article-title":"A fully automated object extraction system for the world wide web","author":"Buttler","year":"2001"},{"key":"10.1016\/j.eswa.2015.12.040_bib0007","series-title":"Proceedings of the 27th annual international conference on Research and development in information retrieval - SIGIR \u201904","first-page":"456","article-title":"Block-based web search","author":"Cai","year":"2004"},{"key":"10.1016\/j.eswa.2015.12.040_bib0008","series-title":"Proceedings of the 2008 european conference on machine learning and knowledge discovery in databases - part i ECML PKDD \u201908","first-page":"195","article-title":"Bootstrapping information extraction from semi-structured web pages","author":"Carlson","year":"2008"},{"issue":"10","key":"10.1016\/j.eswa.2015.12.040_bib0009","doi-asserted-by":"crossref","first-page":"1411","DOI":"10.1109\/TKDE.2006.152","article-title":"A survey of web information extraction systems","volume":"18","author":"Chang","year":"2006","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"10.1016\/j.eswa.2015.12.040_bib0010","series-title":"Proceedings of the 10th international conference on world wide web WWW \u201901","first-page":"681","article-title":"Iepad: Information extraction based on pattern discovery","author":"Chang","year":"2001"},{"key":"10.1016\/j.eswa.2015.12.040_bib0011","series-title":"WWW","first-page":"232","article-title":"A flexible learning system for wrapping tables and lists in HTML documents","author":"Cohen","year":"2002"},{"key":"10.1016\/j.eswa.2015.12.040_bib0012","series-title":"Proceedings of the 27th international conference on very large data bases VLDB \u201901","first-page":"109","article-title":"Roadrunner: towards automatic data extraction from large web sites","author":"Crescenzi","year":"2001"},{"key":"10.1016\/j.eswa.2015.12.040_bib0013","series-title":"Proceedings of the 2009 acm sigmod international conference on management of data SIGMOD \u201909","first-page":"335","article-title":"Robust web extraction: an approach based on a probabilistic tree-edit model","author":"Dalvi","year":"2009"},{"issue":"4","key":"10.1016\/j.eswa.2015.12.040_bib0014","doi-asserted-by":"crossref","first-page":"219","DOI":"10.14778\/1938545.1938547","article-title":"Automatic wrappers for large scale web extraction","volume":"4","author":"Dalvi","year":"2011","journal-title":"Proceedings of the VLDB Endowment"},{"key":"10.1016\/j.eswa.2015.12.040_bib0002","doi-asserted-by":"crossref","unstructured":"Document Object Model (DOM) https:\/\/linproxy.fan.workers.dev:443\/http\/www.w3.org\/DOM\/, (2005). Accessed 15.01.16.","DOI":"10.1484\/J.RM.2.303568"},{"key":"10.1016\/j.eswa.2015.12.040_bib0015","unstructured":"Eikvil, L., & Eikvil, L. (1999). Information extraction from world wide web - a survey. https:\/\/linproxy.fan.workers.dev:443\/http\/citeseerx.ist.psu.edu\/viewdoc\/summary?doi=10.1.1.41.4905"},{"key":"10.1016\/j.eswa.2015.12.040_bib0016","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1016\/j.knosys.2014.07.007","article-title":"Web data extraction, applications and techniques: A survey","volume":"70","author":"Ferrara","year":"2014","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.eswa.2015.12.040_bib0017","series-title":"Proceedings of the seventh international conference on Knowledge capture - K-CAP \u201913","first-page":"41","article-title":"Unsupervised wrapper induction using linked data","author":"Gentile","year":"2013"},{"year":"1997","series-title":"Algorithms on strings, trees, and sequences: computer science and computational biology","author":"Gusfield","key":"10.1016\/j.eswa.2015.12.040_bib0018"},{"key":"10.1016\/j.eswa.2015.12.040_bib0019","series-title":"Proceedings of the workshop on management of semistructured data","first-page":"1","article-title":"Extracting semistructured information from the web","author":"Hammer","year":"1997"},{"issue":"3","key":"10.1016\/j.eswa.2015.12.040_bib0020","doi-asserted-by":"crossref","first-page":"33","DOI":"10.1145\/603867.603873","article-title":"Wrapping web data into XML","volume":"30","author":"Han","year":"2001","journal-title":"SIGMOD Record"},{"key":"10.1016\/j.eswa.2015.12.040_bib0021","series-title":"Proceedings of the 34th international ACM SIGIR conference on research and development in information retrieval SIGIR \u201911","first-page":"775","article-title":"From one tree to a forest: a unified solution for structured web data extraction","author":"Hao","year":"2011"},{"issue":"8","key":"10.1016\/j.eswa.2015.12.040_bib0022","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1016\/S0306-4379(98)00027-1","article-title":"Generating finite-state transducers for semi-structured data extraction from the web","volume":"23","author":"Hsu","year":"1998","journal-title":"Information Systems"},{"key":"10.1016\/j.eswa.2015.12.040_bib0023","series-title":"Technical Report","article-title":"Information Extraction A Survey","author":"Kaiser","year":"2005"},{"year":"1997","series-title":"Wrapper induction for information extraction","author":"Kushmerick","key":"10.1016\/j.eswa.2015.12.040_bib0024"},{"issue":"1-2","key":"10.1016\/j.eswa.2015.12.040_bib0025","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1016\/S0004-3702(99)00100-9","article-title":"Wrapper induction: efficiency and expressiveness","volume":"118","author":"Kushmerick","year":"2000","journal-title":"Artificial Intelligence"},{"key":"10.1016\/j.eswa.2015.12.040_bib0026","series-title":"Proceedings of the 15th international joint conference on artificial intelligence (IJCAI \u201997)","first-page":"729","article-title":"Wrapper induction for information extraction","author":"Kushmerick","year":"1997"},{"key":"10.1016\/j.eswa.2015.12.040_bib0027","series-title":"Proceedings of the 2004 ACM SIGMOD international conference on management of data SIGMOD \u201904","first-page":"119","article-title":"Using the structure of web sites for automatic segmentation of tables","author":"Lerman","year":"2004"},{"key":"10.1016\/j.eswa.2015.12.040_bib0028","doi-asserted-by":"crossref","first-page":"149","DOI":"10.1613\/jair.1145","article-title":"Wrapper maintenance: a machine learning approach","volume":"18","author":"Lerman","year":"2003","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"8","key":"10.1016\/j.eswa.2015.12.040_bib0029","first-page":"707","article-title":"Binary codes capable of correcting deletions, insertions, and reversals","volume":"10","author":"Levenshtein","year":"1966","journal-title":"Soviet Physics Doklady"},{"key":"10.1016\/j.eswa.2015.12.040_bib0030","series-title":"Proceedings of the ninth ACM SIGKDD international conference on knowledge discovery and data mining KDD \u201903","first-page":"601","article-title":"Mining data records in web pages","author":"Liu","year":"2003"},{"key":"10.1016\/j.eswa.2015.12.040_bib0031","series-title":"AAAI: Workshop on AI and information integration","first-page":"2","article-title":"Stalker: learning extraction rules for semistructured, web-based information sources","author":"Muslea","year":"1998"},{"key":"10.1016\/j.eswa.2015.12.040_bib0032","series-title":"Proceedings of the third annual conference on autonomous agents AGENTS \u201999","first-page":"190","article-title":"A hierarchical approach to wrapper induction","author":"Muslea","year":"1999"},{"key":"10.1016\/j.eswa.2015.12.040_bib0033","series-title":"Technical Report","article-title":"Robust web data extraction with XML path expressions","author":"Myllymaki","year":"2002"},{"issue":"1","key":"10.1016\/j.eswa.2015.12.040_bib0034","doi-asserted-by":"crossref","first-page":"31","DOI":"10.1145\/375360.375365","article-title":"A guided tour to approximate string matching","volume":"33","author":"Navarro","year":"2001","journal-title":"ACM Computing Surveys"},{"key":"10.1016\/j.eswa.2015.12.040_bib0035","series-title":"Proceedings of the international conference on knowledge discovery and information retrieval","first-page":"107:492","article-title":"Wrapper induction by xpath alignment","volume":"vol. 6","author":"Nielandt","year":"2014"},{"key":"10.1016\/j.eswa.2015.12.040_bib0036","series-title":"Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval - SIGIR \u201903","first-page":"235","article-title":"Table extraction using conditional random fields","author":"Pinto","year":"2003"},{"key":"10.1016\/j.eswa.2015.12.040_bib0037","first-page":"738","article-title":"Building light-weight wrappers for legacy web data-sources using w4f","author":"Sahuguet","year":"1999","journal-title":"VLDB"},{"issue":"1-3","key":"10.1016\/j.eswa.2015.12.040_bib0038","doi-asserted-by":"crossref","first-page":"233","DOI":"10.1023\/A:1007562322031","article-title":"Learning information extraction rules for semi-structured and free text","volume":"34","author":"Soderland","year":"1999","journal-title":"Machine Learning"},{"key":"10.1016\/j.eswa.2015.12.040_bib0039","series-title":"Proceeding of Special interest tracks and posters of the 14th international conference on world wide web - WWW \u201905","first-page":"968","article-title":"Interactive web-wrapper construction for extracting relational information from web documents","author":"Sugibuchi","year":"2005"},{"year":"2011","series-title":"Siloseer : a visual content extraction system","author":"Varun","key":"10.1016\/j.eswa.2015.12.040_bib0040"},{"key":"10.1016\/j.eswa.2015.12.040_bib0041","series-title":"Proceedings of the twelfth international conference on World Wide Web - WWW \u201903","first-page":"187","article-title":"Data extraction and label assignment for web databases","author":"Wang","year":"2003"},{"issue":"4","key":"10.1016\/j.eswa.2015.12.040_bib0042","doi-asserted-by":"crossref","first-page":"523","DOI":"10.1109\/TKDE.2009.111","article-title":"Learning to adapt web information extraction knowledge and discovering new attributes via a Bayesian approach","volume":"22","author":"Wong","year":"2010","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"10.1016\/j.eswa.2015.12.040_bib0001","unstructured":"Xpath 1.0. W3C recommendation, (November1999). https:\/\/linproxy.fan.workers.dev:443\/http\/www.w3.org\/TR\/xpath\/ Accessed 15.01.16."},{"key":"10.1016\/j.eswa.2015.12.040_bib0043","series-title":"Proceedings of the 14th international conference on world wide web WWW \u201905","first-page":"76","article-title":"Web data extraction based on partial tree alignment","author":"Zhai","year":"2005"},{"key":"10.1016\/j.eswa.2015.12.040_bib0044","series-title":"Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining - KDD \u201907","first-page":"894","article-title":"Joint optimization of wrapper generation and template detection","author":"Zheng","year":"2007"},{"key":"10.1016\/j.eswa.2015.12.040_bib0045","series-title":"Proceedings of the 21st international conference companion on world wide web - www \u201912 companion","first-page":"93","article-title":"Data extraction from web pages based on structural-semantic entropy","author":"Zheng","year":"2012"},{"key":"10.1016\/j.eswa.2015.12.040_bib0046","series-title":"Proceedings of the 12th ACM SIGKDD international conference on knowledge discovery and data mining","first-page":"494","article-title":"Simultaneous record detection and attribute labeling in web data extraction","author":"Zhu","year":"2006"}],"container-title":["Expert Systems with Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0957417415008489?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/api.elsevier.com\/content\/article\/PII:S0957417415008489?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2019,9,3]],"date-time":"2019-09-03T05:28:44Z","timestamp":1567488524000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/linkinghub.elsevier.com\/retrieve\/pii\/S0957417415008489"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6]]},"references-count":46,"alternative-id":["S0957417415008489"],"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/j.eswa.2015.12.040","relation":{},"ISSN":["0957-4174"],"issn-type":[{"type":"print","value":"0957-4174"}],"subject":[],"published":{"date-parts":[[2016,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Predicate enrichment of aligned XPaths for wrapper induction","name":"articletitle","label":"Article Title"},{"value":"Expert Systems with Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1016\/j.eswa.2015.12.040","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"Copyright \u00a9 2016 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}]}}