{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,9,13]],"date-time":"2023-09-13T19:53:15Z","timestamp":1694634795444},"reference-count":17,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"3","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Inf. &amp; Syst."],"published-print":{"date-parts":[[2009]]},"DOI":"10.1587\/transinf.e92.d.506","type":"journal-article","created":{"date-parts":[[2009,3,19]],"date-time":"2009-03-19T06:02:36Z","timestamp":1237442556000},"page":"506-511","source":"Crossref","is-referenced-by-count":0,"title":["Training Set Selection for Building Compact and Efficient Language Models"],"prefix":"10.1587","volume":"E92-D","author":[{"given":"Keiji","family":"YASUDA","sequence":"first","affiliation":[{"name":"ATR Spoken Language Translation Research Laboratories"},{"name":"National Institute of Communications Technology"}]},{"given":"Hirofumi","family":"YAMAMOTO","sequence":"additional","affiliation":[{"name":"ATR Spoken Language Translation Research Laboratories"},{"name":"National Institute of Communications Technology"},{"name":"Department of Information, School of Science and Engineering, Kinki University"}]},{"given":"Eiichiro","family":"SUMITA","sequence":"additional","affiliation":[{"name":"ATR Spoken Language Translation Research Laboratories"},{"name":"National Institute of Communications Technology"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] D. Carter, &ldquo;Improving language models by clustering training sentences, &rdquo; Proc. Annual Meeting of the Association for Computational Linguistics (ACL), pp.59-64, 1994.","DOI":"10.3115\/974358.974372"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] G. Kikui, E. Sumita, T. Takezawa, and S. Yamamoto, &ldquo;Creating corpora for speech-to-speech translation, &rdquo; Proc. EUROSPEECH, pp.381-384, 2003.","DOI":"10.21437\/Interspeech.2004-157"},{"key":"3","unstructured":"[3] NIST, &ldquo;The 2006 NIST machine translation evaluation plan (MT06), &rdquo; 2006. https:\/\/linproxy.fan.workers.dev:443\/http\/www.nist.gov\/speech\/tests\/mt\/doc\/mt06_evalplan.v3.pdf"},{"key":"4","unstructured":"[4] ELDA, &ldquo;TC-STAR: Technology and corpora for speech to speech translation, &rdquo; 2007. https:\/\/linproxy.fan.workers.dev:443\/http\/www.elda.org\/en\/proj\/tcstar-wp4\/tcs-run3.htm"},{"key":"5","unstructured":"[5] LDC, &ldquo;Linguistic data consortium, &rdquo; 2007. https:\/\/linproxy.fan.workers.dev:443\/http\/www.ldc.upenn.edu\/"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] I.J. Good, &ldquo;The population frequencies of species and the estimation of population parameters, &rdquo; Biometrika, vol.40, no.3, pp.237-264, 1953.","DOI":"10.1093\/biomet\/40.3-4.237"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1162\/089120103321337421"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] P. Koehn, F.J. Och, and D. Marcu, &ldquo;Statistical phrase-based translation, &rdquo; Proc. Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics (HLT-NAACL), pp.127-133, 2003.","DOI":"10.3115\/1073445.1073462"},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] F.J. Och, &ldquo;Minimum error rate training for statistical machine translation, &rdquo; Proc. 41st Annual Meeting of the Association for Computational Linguistics, pp.160-167, 2003.","DOI":"10.3115\/1075096.1075117"},{"key":"10","unstructured":"[10] S.F. Chen and J. Goodman, &ldquo;An empirical study of smoothing techniques for language modeling, &rdquo; Technical Report TR-10-98, Center for Research in Computing Technology (Harvard University), 1998."},{"key":"11","unstructured":"[11] X. Ma, &ldquo;Champollion: A robust parallel text sentence aligner, &rdquo; Proc. International Conference on Language Resources and Evaluation (LREC), pp.489-492, 2006."},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] R. Zhang, G. Kikui, and E. Sumita, &ldquo;Subword-based tagging by conditional random fields for Chinese word segmentation, &rdquo; Proc. North American Chapter of the Association for Computational Linguistics (NAACL), vol.Short Paper, pp.193-196, 2006.","DOI":"10.3115\/1614049.1614098"},{"key":"13","doi-asserted-by":"crossref","unstructured":"[13] K. Papineni, S. Roukos, T. Ward, and W.J. Zhu, &ldquo;Bleu: A method for automatic evaluation of machine translation, &rdquo; Proc. 40th Annual Meeting of the Association for Computational Linguistics (ACL), pp.311-318, 2002.","DOI":"10.3115\/1073083.1073135"},{"key":"14","unstructured":"[14] NIST, &ldquo;Automatic evaluation of machine translation quality using N-gram co-occurence statistics, &rdquo; 2002. https:\/\/linproxy.fan.workers.dev:443\/http\/www.nist.gov\/speech\/tests\/mt\/mt2001\/resource\/"},{"key":"15","unstructured":"[15] Y. Zhang and S. Vogel, &ldquo;Measuring confidence intervals for the machine translation evaluation metrics, &rdquo; Proc. 10th International Conference on Theoretical and Methodological Issues in Machine Translation, 2004."},{"key":"16","unstructured":"[16] D. Hakkani-Tur and M. Rahim, &ldquo;Bootstrapping language models for spoken dialog systems from the World Wide Web, &rdquo; Proc. IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), vol.1, pp.1065-1068, 2006."},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] A. Sethy, S. Narayanan, and B. Ramabhadran, &ldquo;Data driven approach for language model adaptation using stepwise relative entropy minimization, &rdquo; Proc. IEEE Int. Conf. Acoust. Speech Signal Process. (ICASSP), vol.4, pp.177-180, 2007.","DOI":"10.1109\/ICASSP.2007.367192"}],"container-title":["IEICE Transactions on Information and Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/linproxy.fan.workers.dev:443\/http\/www.jstage.jst.go.jp\/article\/transinf\/E92.D\/3\/E92.D_3_506\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,3]],"date-time":"2021-10-03T11:05:22Z","timestamp":1633259122000},"score":1,"resource":{"primary":{"URL":"https:\/\/linproxy.fan.workers.dev:443\/http\/www.jstage.jst.go.jp\/article\/transinf\/E92.D\/3\/E92.D_3_506\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009]]},"references-count":17,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2009]]}},"URL":"https:\/\/linproxy.fan.workers.dev:443\/https\/doi.org\/10.1587\/transinf.e92.d.506","relation":{},"ISSN":["0916-8532","1745-1361"],"issn-type":[{"value":"0916-8532","type":"print"},{"value":"1745-1361","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009]]}}}