您好,登錄后才能下訂單哦!
Spark 2.1.0。
去年在網易之初,已經開發了一個完整的RPC框架,其中使用的核心技術也是Netty,所以當看到Spark的RPC框架時,并不覺得太陌生,關于個人開發的這個RPC框架,真正完全可用是在今年,明年會完善一下,開源出來,因為個人覺得弄得一個簡單RPC框架的技術原理,對于大數據、分布式計算相關的知識,真的是幫助太大。
本篇說一下TransportContext、TransportConf、ConfigProvider、SparkTransportConf,也是僅僅作為個人的閱讀記錄。
TransportContext是創建RPC server和client的關鍵類,其中需要使用到的配置信息保存在TransportConf對象,TransportConf對象用于存儲核心配置信息的對象為ConfigProvider,在實際使用中,一般使用SparkTransportConf來創建TransportConf對象,可以說,SparkTransportConf通過ConfigProvider對象將SparkConf和TransportConf連接了起來,所以實際上,在TransportConf對象中,是可以讀取到SparkConf的配置信息的。
依然是在關鍵地方加了個人的注釋,有些地方英文注釋本身已經說得很明白了,就不加注釋了。
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.network.util;
import com.google.common.primitives.Ints;
/**
* A central location that tracks all the settings we expose to users.
*/
public class TransportConf {
static {
// Set this due to Netty PR #5661 for Netty 4.0.37+ to work
System.setProperty("io.netty.maxDirectMemory", "0");
}
private final String SPARK_NETWORK_IO_MODE_KEY;
private final String SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY;
private final String SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY;
private final String SPARK_NETWORK_IO_BACKLOG_KEY;
private final String SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY;
private final String SPARK_NETWORK_IO_SERVERTHREADS_KEY;
private final String SPARK_NETWORK_IO_CLIENTTHREADS_KEY;
private final String SPARK_NETWORK_IO_RECEIVEBUFFER_KEY;
private final String SPARK_NETWORK_IO_SENDBUFFER_KEY;
private final String SPARK_NETWORK_SASL_TIMEOUT_KEY;
private final String SPARK_NETWORK_IO_MAXRETRIES_KEY;
private final String SPARK_NETWORK_IO_RETRYWAIT_KEY;
private final String SPARK_NETWORK_IO_LAZYFD_KEY;
private final ConfigProvider conf; // 配置提供者
private final String module; // 配置的模塊名稱
public TransportConf(String module, ConfigProvider conf) {
this.module = module;
this.conf = conf;
SPARK_NETWORK_IO_MODE_KEY = getConfKey("io.mode");
SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY = getConfKey("io.preferDirectBufs");
SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY = getConfKey("io.connectionTimeout");
SPARK_NETWORK_IO_BACKLOG_KEY = getConfKey("io.backLog");
SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY = getConfKey("io.numConnectionsPerPeer");
SPARK_NETWORK_IO_SERVERTHREADS_KEY = getConfKey("io.serverThreads");
SPARK_NETWORK_IO_CLIENTTHREADS_KEY = getConfKey("io.clientThreads");
SPARK_NETWORK_IO_RECEIVEBUFFER_KEY = getConfKey("io.receiveBuffer");
SPARK_NETWORK_IO_SENDBUFFER_KEY = getConfKey("io.sendBuffer");
SPARK_NETWORK_SASL_TIMEOUT_KEY = getConfKey("sasl.timeout");
SPARK_NETWORK_IO_MAXRETRIES_KEY = getConfKey("io.maxRetries");
SPARK_NETWORK_IO_RETRYWAIT_KEY = getConfKey("io.retryWait");
SPARK_NETWORK_IO_LAZYFD_KEY = getConfKey("io.lazyFD");
}
public int getInt(String name, int defaultValue) {
return conf.getInt(name, defaultValue);
}
private String getConfKey(String suffix) {
return "spark." + module + "." + suffix;
}
/** IO mode: nio or epoll */
public String ioMode() { return conf.get(SPARK_NETWORK_IO_MODE_KEY, "NIO").toUpperCase(); }
/** If true, we will prefer allocating off-heap byte buffers within Netty. */
public boolean preferDirectBufs() {
return conf.getBoolean(SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY, true);
}
/** Connect timeout in milliseconds. Default 120 secs. */
public int connectionTimeoutMs() {
long defaultNetworkTimeoutS = JavaUtils.timeStringAsSec(
conf.get("spark.network.timeout", "120s"));
long defaultTimeoutMs = JavaUtils.timeStringAsSec(
conf.get(SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY, defaultNetworkTimeoutS + "s")) * 1000;
return (int) defaultTimeoutMs;
}
/** Number of concurrent connections between two nodes for fetching data. */
public int numConnectionsPerPeer() {
return conf.getInt(SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY, 1);
}
/** Requested maximum length of the queue of incoming connections. Default -1 for no backlog. */
public int backLog() { return conf.getInt(SPARK_NETWORK_IO_BACKLOG_KEY, -1); }
/** Number of threads used in the server thread pool. Default to 0, which is 2x#cores. */
public int serverThreads() { return conf.getInt(SPARK_NETWORK_IO_SERVERTHREADS_KEY, 0); }
/** Number of threads used in the client thread pool. Default to 0, which is 2x#cores. */
public int clientThreads() { return conf.getInt(SPARK_NETWORK_IO_CLIENTTHREADS_KEY, 0); }
/**
* Receive buffer size (SO_RCVBUF).
* Note: the optimal size for receive buffer and send buffer should be
* latency * network_bandwidth.
* Assuming latency = 1ms, network_bandwidth = 10Gbps
* buffer size should be ~ 1.25MB
*/
public int receiveBuf() { return conf.getInt(SPARK_NETWORK_IO_RECEIVEBUFFER_KEY, -1); }
/** Send buffer size (SO_SNDBUF). */
public int sendBuf() { return conf.getInt(SPARK_NETWORK_IO_SENDBUFFER_KEY, -1); }
/** Timeout for a single round trip of SASL token exchange, in milliseconds. */
public int saslRTTimeoutMs() {
return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_SASL_TIMEOUT_KEY, "30s")) * 1000;
}
/**
* Max number of times we will try IO exceptions (such as connection timeouts) per request.
* If set to 0, we will not do any retries.
*/
public int maxIORetries() { return conf.getInt(SPARK_NETWORK_IO_MAXRETRIES_KEY, 3); }
/**
* Time (in milliseconds) that we will wait in order to perform a retry after an IOException.
* Only relevant if maxIORetries > 0.
*/
public int ioRetryWaitTimeMs() {
return (int) JavaUtils.timeStringAsSec(conf.get(SPARK_NETWORK_IO_RETRYWAIT_KEY, "5s")) * 1000;
}
/**
* Minimum size of a block that we should start using memory map rather than reading in through
* normal IO operations. This prevents Spark from memory mapping very small blocks. In general,
* memory mapping has high overhead for blocks close to or below the page size of the OS.
*/
public int memoryMapBytes() {
return Ints.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.storage.memoryMapThreshold", "2m")));
}
/**
* Whether to initialize FileDescriptor lazily or not. If true, file descriptors are
* created only when data is going to be transferred. This can reduce the number of open files.
*/
public boolean lazyFileDescriptor() {
return conf.getBoolean(SPARK_NETWORK_IO_LAZYFD_KEY, true);
}
/**
* Maximum number of retries when binding to a port before giving up.
*/
public int portMaxRetries() {
return conf.getInt("spark.port.maxRetries", 16);
}
/**
* Maximum number of bytes to be encrypted at a time when SASL encryption is enabled.
*/
public int maxSaslEncryptedBlockSize() {
return Ints.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.network.sasl.maxEncryptedBlockSize", "64k")));
}
/**
* Whether the server should enforce encryption on SASL-authenticated connections.
*/
public boolean saslServerAlwaysEncrypt() {
return conf.getBoolean("spark.network.sasl.serverAlwaysEncrypt", false);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.network.util;
import java.util.NoSuchElementException;
/**
* Provides a mechanism for constructing a {@link TransportConf} using some sort of configuration.
* Leaf Note:
* 提供一種使用某些配置的方式去構造一個TransportConf對象,其實什么意思呢?
* 看到其提供了抽象get方法,該類中所有非抽象方法最終都是調用該抽象方法的,所以顯然在構造ConfigProvider對象時,
* 就可以重載get(String name)方法,如何重載?它的返回值使用SparkConf的get()方法fuc獲取SparkConf對象的配置
* 信息就可以了,查看SparkTransportConf,正是這樣來使用ConfigProvider的
*/
public abstract class ConfigProvider {
/** Obtains the value of the given config, throws NoSuchElementException if it doesn't exist. */
public abstract String get(String name);
public String get(String name, String defaultValue) {
try {
return get(name);
} catch (NoSuchElementException e) {
return defaultValue;
}
}
public int getInt(String name, int defaultValue) {
return Integer.parseInt(get(name, Integer.toString(defaultValue)));
}
public long getLong(String name, long defaultValue) {
return Long.parseLong(get(name, Long.toString(defaultValue)));
}
public double getDouble(String name, double defaultValue) {
return Double.parseDouble(get(name, Double.toString(defaultValue)));
}
public boolean getBoolean(String name, boolean defaultValue) {
return Boolean.parseBoolean(get(name, Boolean.toString(defaultValue)));
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.network.netty
import org.apache.spark.SparkConf
import org.apache.spark.network.util.{ConfigProvider, TransportConf}
/**
* Provides a utility for transforming from a SparkConf inside a Spark JVM (e.g., Executor,
* Driver, or a standalone shuffle service) into a TransportConf with details on our environment
* like the number of cores that are allocated to this JVM.
* Leaf Note:
* 一般創建TransportConf是通過SparkTransportConf來進行創建的,
* SparkTransportConf一個很重要的作用是,將SparkConf與TransportConf連接起來,那怎么做到的?
* 那就是使用SparkConf的get方法去代理實現ConfigProvider的抽象get方法,而恰恰TransportConf
* 中有一個ConfigProvider的屬性
*/
object SparkTransportConf {
/**
* Specifies an upper bound on the number of Netty threads that Spark requires by default.
* In practice, only 2-4 cores should be required to transfer roughly 10 Gb/s, and each core
* that we use will have an initial overhead of roughly 32 MB of off-heap memory, which comes
* at a premium.
*
* Thus, this value should still retain maximum throughput and reduce wasted off-heap memory
* allocation. It can be overridden by setting the number of serverThreads and clientThreads
* manually in Spark's configuration.
*/
private val MAX_DEFAULT_NETTY_THREADS = 8
/**
* Utility for creating a [[TransportConf]] from a [[SparkConf]].
* @param _conf the [[SparkConf]]
* @param module the module name
* @param numUsableCores if nonzero, this will restrict the server and client threads to only
* use the given number of cores, rather than all of the machine's cores.
* This restriction will only occur if these properties are not already set.
*/
def fromSparkConf(_conf: SparkConf, module: String, numUsableCores: Int = 0): TransportConf = {
val conf = _conf.clone
// Specify thread configuration based on our JVM's allocation of cores (rather than necessarily
// assuming we have all the machine's cores).
// NB: Only set if serverThreads/clientThreads not already set.
val numThreads = defaultNumThreads(numUsableCores)
conf.setIfMissing(s"spark.$module.io.serverThreads", numThreads.toString)
conf.setIfMissing(s"spark.$module.io.clientThreads", numThreads.toString)
new TransportConf(module, new ConfigProvider {
override def get(name: String): String = conf.get(name)
})
}
/**
* Returns the default number of threads for both the Netty client and server thread pools.
* If numUsableCores is 0, we will use Runtime get an approximate number of available cores.
*/
private def defaultNumThreads(numUsableCores: Int): Int = {
val availableCores =
if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
math.min(availableCores, MAX_DEFAULT_NETTY_THREADS)
}
}
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。