WEKO3
アイテム
{"_buckets": {"deposit": "ef31f9a9-13d1-40cb-bc8b-198593ff9633"}, "_deposit": {"created_by": 13, "id": "9340", "owners": [13], "pid": {"revision_id": 0, "type": "depid", "value": "9340"}, "status": "published"}, "_oai": {"id": "oai:uec.repo.nii.ac.jp:00009340", "sets": ["6"]}, "author_link": ["25434", "25436", "25435"], "item_10001_biblio_info_7": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2019-08", "bibliographicIssueDateType": "Issued"}, "bibliographicIssueNumber": "8", "bibliographicPageEnd": "1625", "bibliographicPageStart": "1617", "bibliographicVolumeNumber": "E102.B", "bibliographic_titles": [{}, {"bibliographic_title": "IEICE Transactions on Communications", "bibliographic_titleLang": "en"}]}]}, "item_10001_description_5": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "Apache Hadoop and its ecosystem have become the de facto platform for processing large-scale data, or Big Data, because it hides the complexity of distributed computing, scheduling, and communication while providing fault-tolerance. Cloud-based environments are becoming a popular platform for hosting Hadoop clusters due to their low initial cost and limitless capacity. However, cloud-based Hadoop clusters bring their own challenges due to contradictory design principles. Hadoop is designed on the shared-nothing principle while cloud is based on the concepts of consolidation and resource sharing. Most of Hadoop\u0027s features are designed for on-premises data centers where the cluster topology is known. Hadoop depends on the rack assignment of servers (configured by the cluster administrator) to calculate the distance between servers. Hadoop calculates the distance between servers to find the best remote server from which to fetch data from when fetching non-local data. However, public cloud environment providers do not share rack information of virtual servers with their tenants. Lack of rack information of servers may allow Hadoop to fetch data from a remote server that is on the other side of the data center. To overcome this problem, we propose a delay distribution based scheme to find the closest server to fetch non-local data for public cloud-based Hadoop clusters. The proposed scheme bases server selection on the delay distributions between server pairs. Delay distribution is calculated measuring the round-trip time between servers periodically. Our experiments observe that the proposed scheme outperforms conventional Hadoop nearly by 12% in terms of non-local data fetch time. This reduction in data fetch time will lead to a reduction in job run time, especially in real-world multi-user clusters where non-local data fetching can happen frequently.", "subitem_description_type": "Abstract"}]}, "item_10001_publisher_8": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "IEICE "}]}, "item_10001_relation_14": {"attribute_name": "DOI", "attribute_value_mlt": [{"subitem_relation_type": "isVersionOf", "subitem_relation_type_id": {"subitem_relation_type_id_text": "10.1587/transcom.2018EBP3243", "subitem_relation_type_select": "DOI"}}]}, "item_10001_relation_17": {"attribute_name": "関連サイト", "attribute_value_mlt": [{"subitem_relation_type_id": {"subitem_relation_type_id_text": "http://search.ieice.org/index.html ", "subitem_relation_type_select": "URI"}}]}, "item_10001_rights_15": {"attribute_name": "権利", "attribute_value_mlt": [{"subitem_rights": "©2019 IEICE "}]}, "item_10001_source_id_9": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "0916-8516", "subitem_source_identifier_type": "ISSN"}]}, "item_10001_version_type_20": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_ab4af688f83e57aa", "subitem_version_type": "AM"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "RANAWEERA, Ravindra Sandaruwan", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "25434", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "OKI, Eiji", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "25435", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "KITSUWAN, Nattapong", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "25436", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2019-09-19"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud.pdf", "filesize": [{"value": "1.5 MB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_free", "mimetype": "application/pdf", "size": 1500000.0, "url": {"label": "Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud", "url": "https://uec.repo.nii.ac.jp/record/9340/files/Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud.pdf"}, "version_id": "fdbb9ea7-ab4a-4ad3-8d44-4b5cc30ad184"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "public cloud", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Hadoop", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "big data", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "HDFS", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "journal article", "resourceuri": "http://purl.org/coar/resource_type/c_6501"}]}, "item_title": "Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud", "subitem_title_language": "en"}]}, "item_type_id": "10001", "owner": "13", "path": ["6"], "permalink_uri": "https://uec.repo.nii.ac.jp/records/9340", "pubdate": {"attribute_name": "公開日", "attribute_value": "2019-09-19"}, "publish_date": "2019-09-19", "publish_status": "0", "recid": "9340", "relation": {}, "relation_version_is_last": true, "title": ["Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud"], "weko_shared_id": -1}
Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud
https://uec.repo.nii.ac.jp/records/9340
https://uec.repo.nii.ac.jp/records/9340322f50bb-1cf0-4386-ad31-94dfe5e316b7
名前 / ファイル | ライセンス | アクション |
---|---|---|
Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud (1.5 MB)
|
|
Item type | 学術雑誌論文 / Journal Article(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2019-09-19 | |||||
タイトル | ||||||
言語 | en | |||||
タイトル | Delay Distribution Based Remote Data Fetch Scheme for Hadoop Clusters in Public Cloud | |||||
言語 | ||||||
言語 | eng | |||||
キーワード | ||||||
言語 | en | |||||
主題 | public cloud | |||||
キーワード | ||||||
言語 | en | |||||
主題 | Hadoop | |||||
キーワード | ||||||
言語 | en | |||||
主題 | big data | |||||
キーワード | ||||||
言語 | en | |||||
主題 | HDFS | |||||
資源タイプ | ||||||
資源タイプ識別子 | http://purl.org/coar/resource_type/c_6501 | |||||
資源タイプ | journal article | |||||
著者 |
RANAWEERA, Ravindra Sandaruwan
× RANAWEERA, Ravindra Sandaruwan× OKI, Eiji× KITSUWAN, Nattapong |
|||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | Apache Hadoop and its ecosystem have become the de facto platform for processing large-scale data, or Big Data, because it hides the complexity of distributed computing, scheduling, and communication while providing fault-tolerance. Cloud-based environments are becoming a popular platform for hosting Hadoop clusters due to their low initial cost and limitless capacity. However, cloud-based Hadoop clusters bring their own challenges due to contradictory design principles. Hadoop is designed on the shared-nothing principle while cloud is based on the concepts of consolidation and resource sharing. Most of Hadoop's features are designed for on-premises data centers where the cluster topology is known. Hadoop depends on the rack assignment of servers (configured by the cluster administrator) to calculate the distance between servers. Hadoop calculates the distance between servers to find the best remote server from which to fetch data from when fetching non-local data. However, public cloud environment providers do not share rack information of virtual servers with their tenants. Lack of rack information of servers may allow Hadoop to fetch data from a remote server that is on the other side of the data center. To overcome this problem, we propose a delay distribution based scheme to find the closest server to fetch non-local data for public cloud-based Hadoop clusters. The proposed scheme bases server selection on the delay distributions between server pairs. Delay distribution is calculated measuring the round-trip time between servers periodically. Our experiments observe that the proposed scheme outperforms conventional Hadoop nearly by 12% in terms of non-local data fetch time. This reduction in data fetch time will lead to a reduction in job run time, especially in real-world multi-user clusters where non-local data fetching can happen frequently. | |||||
書誌情報 |
en : IEICE Transactions on Communications 巻 E102.B, 号 8, p. 1617-1625, 発行日 2019-08 |
|||||
出版者 | ||||||
出版者 | IEICE | |||||
ISSN | ||||||
収録物識別子タイプ | ISSN | |||||
収録物識別子 | 0916-8516 | |||||
DOI | ||||||
関連タイプ | isVersionOf | |||||
識別子タイプ | DOI | |||||
関連識別子 | 10.1587/transcom.2018EBP3243 | |||||
権利 | ||||||
権利情報 | ©2019 IEICE | |||||
関連サイト | ||||||
識別子タイプ | URI | |||||
関連識別子 | http://search.ieice.org/index.html | |||||
著者版フラグ | ||||||
出版タイプ | AM | |||||
出版タイプResource | http://purl.org/coar/version/c_ab4af688f83e57aa |