Tuesday, May 3, 2011

ReaderWriterLockSlim Extension Method Performance

I've been playing with collections and threading and came across the nifty extension methods people have created to ease the use of ReaderWriterLockSlim by allowing the IDisposable pattern.

However, I believe I have come to realize that something in the implementation is a performance killer. I realize that extension methods are not supposed to really impact performance, so I am left assuming that something in the implementation is the cause... the amount of Disposable structs created/collected?

Here's some test code:

using System;
using System.Collections.Generic;
using System.Threading;
using System.Diagnostics;

namespace LockPlay {

    static class RWLSExtension {
        struct Disposable : IDisposable {
            readonly Action _action;
            public Disposable(Action action) {
                _action = action;
            }
            public void Dispose() {
                _action();
            }
        } // end struct
        public static IDisposable ReadLock(this ReaderWriterLockSlim rwls) {
            rwls.EnterReadLock();
            return new Disposable(rwls.ExitReadLock);
        }
        public static IDisposable UpgradableReadLock(this ReaderWriterLockSlim rwls) {
            rwls.EnterUpgradeableReadLock();
            return new Disposable(rwls.ExitUpgradeableReadLock);
        }
        public static IDisposable WriteLock(this ReaderWriterLockSlim rwls) {
            rwls.EnterWriteLock();
            return new Disposable(rwls.ExitWriteLock);
        }
    } // end class

    class Program {

        class MonitorList<T> : List<T>, IList<T> {
            object _syncLock = new object();
            public MonitorList(IEnumerable<T> collection) : base(collection) { }
            T IList<T>.this[int index] {
                get {
                    lock(_syncLock)
                        return base[index];
                }
                set {
                    lock(_syncLock)
                        base[index] = value;
                }
            }
        } // end class

        class RWLSList<T> : List<T>, IList<T> {
            ReaderWriterLockSlim _rwls = new ReaderWriterLockSlim();
            public RWLSList(IEnumerable<T> collection) : base(collection) { }
            T IList<T>.this[int index] {
                get {
                    try {
                        _rwls.EnterReadLock();
                        return base[index];
                    } finally {
                        _rwls.ExitReadLock();
                    }
                }
                set {
                    try {
                        _rwls.EnterWriteLock();
                        base[index] = value;
                    } finally {
                        _rwls.ExitWriteLock();
                    }
                }
            }
        } // end class

        class RWLSExtList<T> : List<T>, IList<T> {
            ReaderWriterLockSlim _rwls = new ReaderWriterLockSlim();
            public RWLSExtList(IEnumerable<T> collection) : base(collection) { }
            T IList<T>.this[int index] {
                get {
                    using(_rwls.ReadLock())
                        return base[index];
                }
                set {
                    using(_rwls.WriteLock())
                        base[index] = value;
                }
            }
        } // end class

        static void Main(string[] args) {
            const int ITERATIONS = 100;
            const int WORK = 10000;
            const int WRITE_THREADS = 4;
            const int READ_THREADS = WRITE_THREADS * 3;

            // create data - first List is for comparison only... not thread safe
            int[] copy = new int[WORK];
            IList<int>[] l = { new List<int>(copy), new MonitorList<int>(copy), new RWLSList<int>(copy), new RWLSExtList<int>(copy) };

            // test each list
            Thread[] writeThreads = new Thread[WRITE_THREADS];
            Thread[] readThreads = new Thread[READ_THREADS];
            foreach(var list in l) {
                Stopwatch sw = Stopwatch.StartNew();
                for(int k=0; k < ITERATIONS; k++) {
                    for(int i = 0; i < writeThreads.Length; i++) {
                        writeThreads[i] = new Thread(p => {
                            IList<int> il = p as IList<int>;
                            int c = il.Count;
                            for(int j = 0; j < c; j++) {
                                il[j] = j;
                            }
                        });
                        writeThreads[i].Start(list);
                    }
                    for(int i = 0; i < readThreads.Length; i++) {
                        readThreads[i] = new Thread(p => {
                            IList<int> il = p as IList<int>;
                            int c = il.Count;
                            for(int j = 0; j < c; j++) {
                                int temp = il[j];
                            }
                        });
                        readThreads[i].Start(list);
                    }
                    for(int i = 0; i < readThreads.Length; i++)
                        readThreads[i].Join();
                    for(int i = 0; i < writeThreads.Length; i++)
                        writeThreads[i].Join();
                };
                sw.Stop();
                Console.WriteLine("time: {0} class: {1}", sw.Elapsed, list.GetType());
            }
            Console.WriteLine("DONE");
            Console.ReadLine();
        }
    } // end class
} // end namespace

Here's a typical result:

time: 00:00:03.0965242 class: System.Collections.Generic.List`1[System.Int32]
time: 00:00:11.9194573 class: LockPlay.Program+MonitorList`1[System.Int32]
time: 00:00:08.9510258 class: LockPlay.Program+RWLSList`1[System.Int32]
time: 00:00:16.9888435 class: LockPlay.Program+RWLSExtList`1[System.Int32]
DONE

As you can see, using the extensions actually makes the performance WORSE than just using lock (monitor).

From stackoverflow
  • Looks like its the price of instantiating millions of structs and the extra bit of invocations.

    I would go as far as to say that the ReaderWriterLockSlim is being misused in this sample, a lock is good enough in this case and the performance edge you get with the ReaderWriterLockSlim is negligible compared to the price of explaining these concepts to junior devs.

    You get a huge advantage with reader writer style locks when it takes a non-negligable amount of time to perform reads and writes. The boost will be biggest when you have a predominantly read based system.

    Try inserting a Thread.Sleep(1) while the locks are acquired to see how huge a difference it makes.

    See this benchmark:

    Time for Test.SynchronizedList`1[System.Int32] Time Elapsed 12310 ms
    Time for Test.ReaderWriterLockedList`1[System.Int32] Time Elapsed 547 ms
    Time for Test.ManualReaderWriterLockedList`1[System.Int32] Time Elapsed 566 ms
    

    In my benchmarking I do not really notice much of a difference between the two styles, I would feel comfortable using it provided it had some finalizer protection in case people forget to dispose ....

    using System.Threading;
    using System.Diagnostics;
    using System.Collections.Generic;
    using System;
    using System.Linq;
    
    namespace Test {
    
        static class RWLSExtension {
            struct Disposable : IDisposable {
                readonly Action _action;
                public Disposable(Action action) {
                    _action = action;
                }
                public void Dispose() {
                    _action();
                }
            }
    
            public static IDisposable ReadLock(this ReaderWriterLockSlim rwls) {
                rwls.EnterReadLock();
                return new Disposable(rwls.ExitReadLock);
            }
            public static IDisposable UpgradableReadLock(this ReaderWriterLockSlim rwls) {
                rwls.EnterUpgradeableReadLock();
                return new Disposable(rwls.ExitUpgradeableReadLock);
            }
            public static IDisposable WriteLock(this ReaderWriterLockSlim rwls) {
                rwls.EnterWriteLock();
                return new Disposable(rwls.ExitWriteLock);
            }
        }
    
        class SlowList<T> {
    
            List<T> baseList = new List<T>();
    
            public void AddRange(IEnumerable<T> items) {
                baseList.AddRange(items);
            }
    
            public virtual T this[int index] {
                get {
                    Thread.Sleep(1);
                    return baseList[index];
                }
                set {
                    baseList[index] = value;
                    Thread.Sleep(1);
                }
            }
        }
    
        class SynchronizedList<T> : SlowList<T> {
    
            object sync = new object();
    
            public override T this[int index] {
                get {
                    lock (sync) {
                        return base[index];
                    }
    
                }
                set {
                    lock (sync) {
                        base[index] = value;
                    }
                }
            }
        }
    
    
        class ManualReaderWriterLockedList<T> : SlowList<T> {
    
            ReaderWriterLockSlim slimLock = new ReaderWriterLockSlim();
    
            public override T this[int index] {
                get {
                    T item;
                    try {
                        slimLock.EnterReadLock();
                        item = base[index];
                    } finally {
                        slimLock.ExitReadLock();
                    }
                    return item;
    
                }
                set {
                    try {
                        slimLock.EnterWriteLock();
                        base[index] = value;
                    } finally {
                        slimLock.ExitWriteLock();
                    }
                }
            }
        }
    
        class ReaderWriterLockedList<T> : SlowList<T> {
    
            ReaderWriterLockSlim slimLock = new ReaderWriterLockSlim();
    
            public override T this[int index] {
                get {
                    using (slimLock.ReadLock()) {
                        return base[index];
                    }
                }
                set {
                    using (slimLock.WriteLock()) {
                        base[index] = value;
                    }
                }
            }
        }
    
    
        class Program {
    
    
            private static void Repeat(int times, int asyncThreads, Action action) {
                if (asyncThreads > 0) {
    
                    var threads = new List<Thread>();
    
                    for (int i = 0; i < asyncThreads; i++) {
    
                        int iterations = times / asyncThreads;
                        if (i == 0) {
                            iterations += times % asyncThreads;
                        }
    
                        Thread thread = new Thread(new ThreadStart(() => Repeat(iterations, 0, action)));
                        thread.Start();
                        threads.Add(thread);
                    }
    
                    foreach (var thread in threads) {
                        thread.Join();
                    }
    
                } else {
                    for (int i = 0; i < times; i++) {
                        action();
                    }
                }
            }
    
            static void TimeAction(string description, Action func) {
                var watch = new Stopwatch();
                watch.Start();
                func();
                watch.Stop();
                Console.Write(description);
                Console.WriteLine(" Time Elapsed {0} ms", watch.ElapsedMilliseconds);
            }
    
            static void Main(string[] args) {
    
                int threadCount = 40;
                int iterations = 200;
                int readToWriteRatio = 60;
    
                var baseList = Enumerable.Range(0, 10000).ToList();
    
                List<SlowList<int>> lists = new List<SlowList<int>>() {
                    new SynchronizedList<int>() ,
                    new ReaderWriterLockedList<int>(),
                    new ManualReaderWriterLockedList<int>()
                };
    
                foreach (var list in lists) {
                    list.AddRange(baseList);
                }
    
    
                foreach (var list in lists) {
                    TimeAction("Time for " + list.GetType().ToString(), () =>
                    {
                        Repeat(iterations, threadCount, () =>
                        {
                            list[100] = 99;
                            for (int i = 0; i < readToWriteRatio; i++) {
                                int ignore = list[i];
                            }
                        });
                    });
                }
    
    
    
                Console.WriteLine("DONE");
                Console.ReadLine();
            }
        }
    }
    
    wekempf : Forgetting to call Dispose is identical to forgetting to unlock, and in that case delayed and non-deterministic finalization is highly unlikely to help prevent the ensuing dead locks. Worse, it's possible (probable) that it would make the dead locks intermittent and nearly impossible to debug. Leave the finalizer out and ensure we get a debuggable deadlock.
    Sam Saffron : @wekempf see: http://gist.github.com/104477 side effect is that disposable needs to be a class for this to work at least in DEBUG
    wekempf : @sambo99: I can see the reasoning there, but still don't agree. At best you've turned dead lock into dead lock + eventual assert, with no added benefit (i.e. it's no easier to debug). All for a mistake that seems HIGHLY unlikely to occur to me (especially if you either change the names of the methods or make them non-extension methods).
    Sam Saffron : @wekempf see: http://gist.github.com/104477 MUCH easier when it comes to debugging, this object is so generic it would quickly float into a framework, then its likely someone somewhere will use it and forget about it, you want to fail as early as possible with multithreaded bugs, this fails earliest.
    Sean Newton : @sambo99: ok, if I understand the differences correctly, my original example compared to yours differs hugely in the amount of write activity that is occuring AND the time spent doing the read and write both. I did the latter on purpose, but I didn't really give much thought to the overall read/write ratio. Do you really believe 60x to be reasonable? I guess I assumed 3-5x.
    Sam Saffron : @sean, it really depends on the application, many applications will spend significantly longer than 1ms in a read lock. I think the key to all this is that you must be spending time doing reads for a readerwriter lock to be worth using
    Sean Newton : @sambo99: I think I got this through my head now. There is a performance hit due to the struct overhead that is noticeable in my initial example due to the threads not doing much... NOT a scenario that is a good solution for read/write locks. When such locks *are* appropriate, the hit is negligible. Been playing around with various scenarios, thanks for pointing me in the right direction.
    280Z28 : I marked this down because it didn't identify and fix the specific problems the extension methods introduce here.
  • My guess (you would need to profile to verify) is that the performance drop isn't from creating the Disposable instances (they should be fairly cheap, being structs). Instead I expect it's from creating the Action delegates. You could try changing the implementation of your Disposable struct to store the instance of ReaderWriterLockSlim instead of creating an Action delegate.

    Edit: @280Z28's post confirms that it's the heap allocation of Action delegates that's causing the slowdown.

  • The code appears to use a struct to avoid object creation overhead, but doesn't take the other necessary steps to keep this lightweight. I believe it boxes the return value from ReadLock, and if so negates the entire advantage of the struct. This should fix all the issues and perform just as well as not going through the IDisposable interface.

    Edit: Benchmarks demanded. These results are normalized so the manual method (call Enter/ExitReadLock and Enter/ExitWriteLock inline with the protected code) have a time value of 1.00. The original method is slow because it allocates objects on the heap that the manual method does not. I fixed this problem, and in release mode even the extension method call overhead goes away leaving it identically as fast as the manual method.

    Debug Build:

    Manual:              1.00
    Original Extensions: 1.62
    My Extensions:       1.24
    

    Release Build:

    Manual:              1.00
    Original Extensions: 1.51
    My Extensions:       1.00
    

    My code:

    internal static class RWLSExtension
    {
        public static ReadLockHelper ReadLock(this ReaderWriterLockSlim readerWriterLock)
        {
            return new ReadLockHelper(readerWriterLock);
        }
    
        public static UpgradeableReadLockHelper UpgradableReadLock(this ReaderWriterLockSlim readerWriterLock)
        {
            return new UpgradeableReadLockHelper(readerWriterLock);
        }
    
        public static WriteLockHelper WriteLock(this ReaderWriterLockSlim readerWriterLock)
        {
            return new WriteLockHelper(readerWriterLock);
        }
    
        public struct ReadLockHelper : IDisposable
        {
            private readonly ReaderWriterLockSlim readerWriterLock;
    
            public ReadLockHelper(ReaderWriterLockSlim readerWriterLock)
            {
                readerWriterLock.EnterReadLock();
                this.readerWriterLock = readerWriterLock;
            }
    
            public void Dispose()
            {
                this.readerWriterLock.ExitReadLock();
            }
        }
    
        public struct UpgradeableReadLockHelper : IDisposable
        {
            private readonly ReaderWriterLockSlim readerWriterLock;
    
            public UpgradeableReadLockHelper(ReaderWriterLockSlim readerWriterLock)
            {
                readerWriterLock.EnterUpgradeableReadLock();
                this.readerWriterLock = readerWriterLock;
            }
    
            public void Dispose()
            {
                this.readerWriterLock.ExitUpgradeableReadLock();
            }
        }
    
        public struct WriteLockHelper : IDisposable
        {
            private readonly ReaderWriterLockSlim readerWriterLock;
    
            public WriteLockHelper(ReaderWriterLockSlim readerWriterLock)
            {
                readerWriterLock.EnterWriteLock();
                this.readerWriterLock = readerWriterLock;
            }
    
            public void Dispose()
            {
                this.readerWriterLock.ExitWriteLock();
            }
        }
    }
    
    Sam Saffron : You didn't even profile your code, nor did you explain why its slow
    280Z28 : My code doesn't create any new heap-allocated objects, so even if the time is identical on a short benchmark, this method places less stress on the memory manager later.

0 comments:

Post a Comment