blob: af1af2a95b6acb0e29a88e2d264e8ee2c80f9cac [file] [log] [blame]
//
// Copyright (C) 2013 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "shill/traffic_monitor.h"
#include <base/bind.h>
#include <base/strings/stringprintf.h>
#include <netinet/in.h>
#include "shill/device.h"
#include "shill/device_info.h"
#include "shill/event_dispatcher.h"
#include "shill/logging.h"
#include "shill/socket_info_reader.h"
using base::StringPrintf;
using std::string;
using std::vector;
namespace shill {
namespace Logging {
static auto kModuleLogScope = ScopeLogger::kLink;
static string ObjectID(Device* d) { return d->link_name(); }
}
// static
const uint16_t TrafficMonitor::kDnsPort = 53;
const int64_t TrafficMonitor::kDnsTimedOutThresholdSeconds = 15;
const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2;
const int64_t TrafficMonitor::kSamplingIntervalMilliseconds = 5000;
TrafficMonitor::TrafficMonitor(const DeviceRefPtr& device,
EventDispatcher* dispatcher)
: device_(device),
dispatcher_(dispatcher),
socket_info_reader_(new SocketInfoReader),
accummulated_congested_tx_queues_samples_(0),
connection_info_reader_(new ConnectionInfoReader),
accummulated_dns_failures_samples_(0) {
}
TrafficMonitor::~TrafficMonitor() {
Stop();
}
void TrafficMonitor::Start() {
SLOG(device_.get(), 2) << __func__;
Stop();
sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic,
base::Unretained(this)));
dispatcher_->PostDelayedTask(FROM_HERE, sample_traffic_callback_.callback(),
kSamplingIntervalMilliseconds);
}
void TrafficMonitor::Stop() {
SLOG(device_.get(), 2) << __func__;
sample_traffic_callback_.Cancel();
ResetCongestedTxQueuesStats();
ResetDnsFailingStats();
}
void TrafficMonitor::ResetCongestedTxQueuesStats() {
accummulated_congested_tx_queues_samples_ = 0;
}
void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() {
SLOG(device_.get(), 2) << __func__ << ": Tx-queues decongested";
ResetCongestedTxQueuesStats();
}
void TrafficMonitor::BuildIPPortToTxQueueLength(
const vector<SocketInfo>& socket_infos,
IPPortToTxQueueLengthMap* tx_queue_lengths) {
SLOG(device_.get(), 3) << __func__;
string device_ip_address = device_->ipconfig()->properties().address;
for (const auto& info : socket_infos) {
SLOG(device_.get(), 4) << "SocketInfo(IP="
<< info.local_ip_address().ToString()
<< ", TX=" << info.transmit_queue_value()
<< ", State=" << info.connection_state()
<< ", TimerState=" << info.timer_state();
if (info.local_ip_address().ToString() != device_ip_address ||
info.transmit_queue_value() == 0 ||
info.connection_state() != SocketInfo::kConnectionStateEstablished ||
(info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending &&
info.timer_state() !=
SocketInfo::kTimerStateZeroWindowProbeTimerPending)) {
SLOG(device_.get(), 4) << "Connection Filtered.";
continue;
}
SLOG(device_.get(), 3) << "Monitoring connection: TX="
<< info.transmit_queue_value()
<< " TimerState=" << info.timer_state();
string local_ip_port =
StringPrintf("%s:%d",
info.local_ip_address().ToString().c_str(),
info.local_port());
(*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value();
}
}
bool TrafficMonitor::IsCongestedTxQueues() {
SLOG(device_.get(), 4) << __func__;
vector<SocketInfo> socket_infos;
if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) ||
socket_infos.empty()) {
SLOG(device_.get(), 3) << __func__ << ": Empty socket info";
ResetCongestedTxQueuesStatsWithLogging();
return false;
}
bool congested_tx_queues = true;
IPPortToTxQueueLengthMap curr_tx_queue_lengths;
BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths);
if (curr_tx_queue_lengths.empty()) {
SLOG(device_.get(), 3) << __func__ << ": No interesting socket info";
ResetCongestedTxQueuesStatsWithLogging();
} else {
for (const auto& length_entry : old_tx_queue_lengths_) {
IPPortToTxQueueLengthMap::iterator curr_tx_queue_it =
curr_tx_queue_lengths.find(length_entry.first);
if (curr_tx_queue_it == curr_tx_queue_lengths.end() ||
curr_tx_queue_it->second < length_entry.second) {
congested_tx_queues = false;
// TODO(armansito): If we had a false positive earlier, we may
// want to correct it here by invoking a "connection back to normal
// callback", so that the OutOfCredits property can be set to
// false.
break;
}
}
if (congested_tx_queues) {
++accummulated_congested_tx_queues_samples_;
SLOG(device_.get(), 2) << __func__
<< ": Congested tx-queues detected ("
<< accummulated_congested_tx_queues_samples_
<< ")";
}
}
old_tx_queue_lengths_ = curr_tx_queue_lengths;
return congested_tx_queues;
}
void TrafficMonitor::ResetDnsFailingStats() {
accummulated_dns_failures_samples_ = 0;
}
void TrafficMonitor::ResetDnsFailingStatsWithLogging() {
SLOG(device_.get(), 2) << __func__ << ": DNS queries restored";
ResetDnsFailingStats();
}
bool TrafficMonitor::IsDnsFailing() {
SLOG(device_.get(), 4) << __func__;
vector<ConnectionInfo> connection_infos;
if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) ||
connection_infos.empty()) {
SLOG(device_.get(), 3) << __func__ << ": Empty connection info";
} else {
// The time-to-expire counter is used to determine when a DNS request
// has timed out. This counter is the number of seconds remaining until
// the entry is removed from the system IP connection tracker. The
// default time is 30 seconds. This is too long of a wait. Instead, we
// want to time out at |kDnsTimedOutThresholdSeconds|. Unfortunately,
// we cannot simply look for entries less than
// |kDnsTimedOutThresholdSeconds| because we will count the entry
// multiple times once its time-to-expire is less than
// |kDnsTimedOutThresholdSeconds|. To ensure that we only count an
// entry once, we look for entries in this time window between
// |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|.
const int64_t kDnsTimedOutLowerThresholdSeconds =
kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000;
string device_ip_address = device_->ipconfig()->properties().address;
for (const auto& info : connection_infos) {
if (info.protocol() != IPPROTO_UDP ||
info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds ||
info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds ||
!info.is_unreplied() ||
info.original_source_ip_address().ToString() != device_ip_address ||
info.original_destination_port() != kDnsPort)
continue;
++accummulated_dns_failures_samples_;
SLOG(device_.get(), 2) << __func__
<< ": DNS failures detected ("
<< accummulated_dns_failures_samples_ << ")";
return true;
}
}
ResetDnsFailingStatsWithLogging();
return false;
}
void TrafficMonitor::SampleTraffic() {
SLOG(device_.get(), 3) << __func__;
// Schedule the sample callback first, so it is possible for the network
// problem callback to stop the traffic monitor.
dispatcher_->PostDelayedTask(FROM_HERE, sample_traffic_callback_.callback(),
kSamplingIntervalMilliseconds);
if (IsCongestedTxQueues() &&
accummulated_congested_tx_queues_samples_ ==
kMinimumFailedSamplesToTrigger) {
LOG(WARNING) << "Congested tx queues detected, out-of-credits?";
network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue);
} else if (IsDnsFailing() &&
accummulated_dns_failures_samples_ ==
kMinimumFailedSamplesToTrigger) {
LOG(WARNING) << "DNS queries failing, out-of-credits?";
network_problem_detected_callback_.Run(kNetworkProblemDNSFailure);
}
}
} // namespace shill