Skip to content

Commit

Permalink
RedisScheduler重新实现
Browse files Browse the repository at this point in the history
  • Loading branch information
邹嵩 committed Jul 20, 2018
1 parent b8f531c commit 874bdbc
Show file tree
Hide file tree
Showing 6 changed files with 747 additions and 799 deletions.
14 changes: 7 additions & 7 deletions src/DotnetSpider.Common/Request.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ public override bool Equals(object obj)

Request request = (Request)obj;

if (!Depth.Equals(request.Depth)) return false;
if (!CycleTriedTimes.Equals(request.CycleTriedTimes)) return false;
if (!Referer.Equals(request.Referer)) return false;
if (!Origin.Equals(request.Origin)) return false;
if (!Method.Equals(request.Method)) return false;
if (!Priority.Equals(request.Priority)) return false;
if (!Content.Equals(request.Content)) return false;
if (!Equals(Depth, request.Depth)) return false;
if (!Equals(CycleTriedTimes, request.CycleTriedTimes)) return false;
if (!Equals(Referer, request.Referer)) return false;
if (!Equals(Origin, request.Origin)) return false;
if (!Equals(Method, request.Method)) return false;
if (!Equals(Priority, request.Priority)) return false;
if (!Equals(Content, request.Content)) return false;

if (Properties == null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public int Depth
/// 添加请求对象到队列
/// </summary>
/// <param name="request">请求对象</param>
public void Push(Request request, Func<Request, bool> shouldReserved)
public void Push(Request request, Func<Request, bool> shouldReserved = null)
{
var action = new Action(() =>
{
Expand Down
14 changes: 14 additions & 0 deletions src/DotnetSpider.Core/Scheduler/QueueDuplicateRemovedScheduler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,19 @@ public override void Dispose()
}
base.Dispose();
}

/// <summary>
/// 取得队列中所有的请求对象
/// </summary>
internal Request[] All
{
get
{
lock (_lock)
{
return _queue.ToArray();
}
}
}
}
}
7 changes: 5 additions & 2 deletions src/DotnetSpider.Core/Spider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,12 @@ protected override void Execute(params string[] arguments)
var downloader = Downloader.Clone();
while (Status == Status.Running)
{
// 从队列中取出一个请求
// 从队列中取出一个请求, 因为 Site 是共享对象, 每个Request都保留了引用, 序列存到Redis或其它数据库中浪费带宽和空间, 因此 Site对象不保存到数据库中
Request request = Scheduler.Poll();

if (request != null && request.Site == null)
{
request.Site = Site;
}
// 如果队列中没有需要处理的请求, 则开始等待, 一直到设定的 EmptySleepTime 结束, 则认为爬虫应该结束了
if (request == null)
{
Expand Down
Loading

0 comments on commit 874bdbc

Please sign in to comment.