{"created":"2025-02-28T04:02:08.259288+00:00","id":2012765,"links":{},"metadata":{"_buckets":{"deposit":"9abab400-d19e-4cae-a17b-ebeedf2df2fd"},"_deposit":{"created_by":122,"id":"2012765","owner":"122","owners":[122],"pid":{"revision_id":0,"type":"depid","value":"2012765"},"status":"published"},"_oai":{"id":"oai:tokushima-u.repo.nii.ac.jp:02012765","sets":["1713853213384:1713853295607"]},"author_link":["942","728"],"control_number":"2012765","item_10001_alternative_title_1":{"attribute_name":"タイトル別表記","attribute_value_mlt":[{"subitem_alternative_title":"Detection of Arbitrary Wake Words","subitem_alternative_title_language":"en"}]},"item_10001_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2024-08-22","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicPageStart":"e14","bibliographicVolumeNumber":"13","bibliographic_titles":[{"bibliographic_title":"APSIPA Transactions on Signal and Information Processing","bibliographic_titleLang":"en"}]}]},"item_10001_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"Most wake word (WW) detection systems used in smartphones and smart speakers only detect specific, predefined WWs such as “Hey, Siri” or “OK, Google”. To build such a system, a large speech corpus consisting of many examples of the selected WWs must be collected to train the model. If we want the device to detect a different WW, collection of a new speech corpus and re-training of the model are required.\nIn this study, we propose a system which is capable of detecting any chosen WW without additional model training or a corpus of WW utterances, allowing users to select and use their preferred WW. Our system consists of a phoneme predictor (PP) and a phoneme sequence detector (PSD). The PP predicts phoneme sequences using acoustic features of the input speech, and outputs phoneme probability distributions. The acoustic models in the PP are trained using the Connectionist Temporal Classification (CTC) loss criterion. The PSD takes the output of the PP as input, and predicts the probability of whether or not the WW has been input. In our evaluation experiments, we performed six-phoneme WW detection. Our results showed that the proposed method achieved 90% WW detection accuracy.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_10001_publisher_8":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Cambridge University Press","subitem_publisher_language":"en"}]},"item_10001_rights_15":{"attribute_name":"権利情報","attribute_value_mlt":[{"subitem_rights":"This is an Open Access article, distributed under the terms of the Creative Commons Attribution licence (http://creativecommons.org/licenses/by-nc/4.0/), which permits unrestricted re-use, distribution, and reproduction in any medium, for non-commercial use, provided the original work is properly cited.","subitem_rights_language":"en"}]},"item_10001_source_id_9":{"attribute_name":"収録物ID","attribute_value_mlt":[{"subitem_source_identifier":"20487703","subitem_source_identifier_type":"EISSN"}]},"item_10001_version_type_20":{"attribute_name":"出版タイプ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_1715043197608":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_1722929371688":{"attribute_name":"出版社版DOI","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_language":"ja","subitem_relation_name_text":"10.1561/116.20240014"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"http://dx.doi.org/10.1561/116.20240014","subitem_relation_type_select":"DOI"}}]},"item_1723180141928":{"attribute_name":"EID","attribute_value_mlt":[{"subitem_identifier_type":"URI","subitem_identifier_uri":"412237"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"西村, 良太","creatorNameLang":"ja"},{"creatorName":"ニシムラ, リョウタ","creatorNameLang":"ja-Kana"},{"creatorName":"Nishimura, Ryota","creatorNameLang":"en"}],"familyNames":[{"familyName":"西村","familyNameLang":"ja"},{"familyName":"ニシムラ","familyNameLang":"ja-Kana"},{"familyName":"Nishimura","familyNameLang":"en"}],"givenNames":[{"givenName":"良太","givenNameLang":"ja"},{"givenName":"リョウタ","givenNameLang":"ja-Kana"},{"givenName":"Ryota","givenNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"942","nameIdentifierScheme":"WEKO"},{"nameIdentifier":"346405/profile-ja.html","nameIdentifierScheme":"徳島大学 教育研究者総覧","nameIdentifierURI":"http://pub2.db.tokushima-u.ac.jp/ERD/person/346405/profile-ja.html"}]},{"creatorNames":[{"creatorName":"Uno, Takaaki","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Yamamoto, Taiki","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Ohta, Kengo","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"北岡, 教英","creatorNameLang":"ja"},{"creatorName":"キタオカ, ノリヒデ","creatorNameLang":"ja-Kana"},{"creatorName":"Kitaoka, Norihide","creatorNameLang":"en"}],"familyNames":[{"familyName":"北岡","familyNameLang":"ja"},{"familyName":"キタオカ","familyNameLang":"ja-Kana"},{"familyName":"Kitaoka","familyNameLang":"en"}],"givenNames":[{"givenName":"教英","givenNameLang":"ja"},{"givenName":"ノリヒデ","givenNameLang":"ja-Kana"},{"givenName":"Norihide","givenNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"728","nameIdentifierScheme":"WEKO"},{"nameIdentifier":"10333501","nameIdentifierScheme":"e-Rad_Researcher","nameIdentifierURI":"https://nrid.nii.ac.jp/ja/search/?qm=10333501"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_access","date":[{"dateType":"Available","dateValue":"2025-03-18"}],"displaytype":"detail","filename":"atsip_13_1_e14.pdf","filesize":[{"value":"2 MB"}],"format":"application/pdf","licensetype":"license_3","mimetype":"application/pdf","url":{"objectType":"fulltext","url":"https://tokushima-u.repo.nii.ac.jp/record/2012765/files/atsip_13_1_e14.pdf"},"version_id":"6d145984-f9a2-41ac-85e3-78b0c7a1e2f0"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Wake word","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"CTC","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"end-to-end modeling","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"phoneme sequence detector","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Detection of Arbitrary Wake Words by Coupling a Phoneme Predictor and a Phoneme Sequence Detector","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Detection of Arbitrary Wake Words by Coupling a Phoneme Predictor and a Phoneme Sequence Detector","subitem_title_language":"en"}]},"item_type_id":"40001","owner":"122","path":["1713853295607"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2025-03-18"},"publish_date":"2025-03-18","publish_status":"0","recid":"2012765","relation_version_is_last":true,"title":["Detection of Arbitrary Wake Words by Coupling a Phoneme Predictor and a Phoneme Sequence Detector"],"weko_creator_id":"122","weko_shared_id":-1},"updated":"2025-03-18T06:11:46.580708+00:00"}