Wednesday, September 29, 2010

.NET 4.0 Parallel IO Test

The new .NET 4.0 TPL simplifies the parallelizing programming. Would it speed up the I/O in reading and writing a large volumn of files? The answer is yes. Through a quick test in my desktop (Intel i5 760 CPU, 8G memory,Windows Server 2008 64-bit), I found that the parallel writing 10000 small files is about 4 times faster than that without TPL. Parallel loading all those 10000 files is about 6 times faster than that without using TPL. The test code is below:
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml.Linq;
using System.IO;
using System.Diagnostics;
using System.Threading.Tasks;

class Program
{
static string path = @"c:\Test\";
static string fakeValue = GetFakeValue(1024);
static int fileNumber = 10000;
static void Main(string[] args)
{
CleanupFolder();
Stopwatch watch = new Stopwatch();

watch.Start();
SaveOneByOne();
watch.Stop();
Console.WriteLine("Saving {0} files serially takes {1} milli-seconds.",
fileNumber, watch.ElapsedMilliseconds);

watch.Restart();
ReadOneByOne();
watch.Stop();
Console.WriteLine("Reading {0} files serially takes {1} milli-seconds.",
fileNumber, watch.ElapsedMilliseconds);

CleanupFolder();
watch.Restart();
SaveInParallel();
watch.Stop();
Console.WriteLine("Saving {0} files in parallel takes {1} milli-seconds.",
fileNumber, watch.ElapsedMilliseconds);

watch.Restart();
ReadInParallel();
watch.Stop();
Console.WriteLine("Reading {0} files in parallel takes {1} milli-seconds.",
fileNumber, watch.ElapsedMilliseconds);

Console.Read();
}

static void SaveOneByOne()
{
for (int i = 0; i < fileNumber; i++)
{
XDocument xfile = GetXDocument(i.ToString(), fakeValue);
xfile.Save(path + i.ToString() + ".xml");
}
}

static void ReadOneByOne()
{
List<XDocument> files = new List<XDocument>();
for (int i = 0; i < fileNumber; i++)
{
XDocument xfile = XDocument.Load(path + i.ToString() + ".xml");
files.Add(xfile);
}
}

static void SaveInParallel()
{
Parallel.For(0, fileNumber, delegate(int i)
{
XDocument xfile = GetXDocument(i.ToString(), fakeValue);
xfile.Save(path + i.ToString() + ".xml");
});
}

static void ReadInParallel()
{
List<XDocument> files = new List<XDocument>();
Parallel.For(0, fileNumber, delegate(int i)
{
XDocument xfile = XDocument.Load(path + i.ToString() + ".xml");
files.Add(xfile);
});
}

static void CleanupFolder()
{
if (!Directory.Exists(path))
{
Directory.CreateDirectory(path);
}
else
{
//Array.ForEach(Directory.GetFiles(path), (string file) => File.Delete(file));
Directory.Delete(path, true);
Directory.CreateDirectory(path);
}
}

static XDocument GetXDocument(string key, string value)
{
return new XDocument(new XDeclaration("1.0", "utf-8", "yes"),
new XElement("Items",
new XElement("Item",
new XElement("Key", key),
new XElement("Value", value))));
}

static string GetFakeValue(int length)
{
StringBuilder sb = new StringBuilder();
string values = "abcdefghijklmnopqrstuvwxyz0123456789";
Random rand = new Random();
for (int i = 0; i < length; i++)
{
sb.Append(values[rand.Next(0, 35)]);
}
return sb.ToString();
}
}
Result:
Saving 10000 files serially takes 29528 milli-seconds.
Reading 10000 files serially takes 2389 milli-seconds.
Saving 10000 files in parallel takes 8072 milli-seconds.
Reading 10000 files in parallel takes 386 milli-seconds.
The results vary slightly in different tests and they are consistent overall.