synchronization demo

not_a_commie · Sep 3, 2009

I put together the code below for some training at work. It contains
numerous methods for doing thread synchronization. Compile it in
release mode to see the various speeds. Change the functions in the
two "for" loops at the bottom (but keep them both the same function).
You'll need a multi-core CPU to see any problem with the non-protected
increment as the CLR is not dumb enough to break up an increment
function when scheduling threads on the same core. I never did figure
out how the Monitor class itself does locking. It may be using some
kind of dictionary / data structure that doesn't require locking for
the TryGetValue call. Have any of you seen a data collection structure
with a "TryGetValueOrAddNew" method that is natively thread safe?

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;

namespace DemoInc
{
class Program
{
#region Locker

struct Locker : IDisposable
{
readonly object _object;
public Locker(object obj)
{
Monitor.Enter(obj);
_object = obj;
// optional: log something here or add a timeout constructor
}

public void Dispose()
{
Monitor.Exit(_object);
}
}

#endregion

#region Bran's Sempahore and Mutex

public class BransSemaphore : IDisposable
{
protected volatile int _currentCount;
protected readonly int _maxCount;
protected readonly ManualResetEvent _acquire = new ManualResetEvent
(false); // these things are darn slow (they're made to be system-wide
objects)
protected readonly ManualResetEvent _release = new ManualResetEvent
(false);
protected readonly object _lock = new object();
public BransSemaphore(int currentCount, int maxCount)
{
if (currentCount < 0)
throw new ArgumentOutOfRangeException("currentCount");
if (maxCount < 1)
throw new ArgumentOutOfRangeException("maxCount");

_currentCount = currentCount;
_maxCount = maxCount;
}
public void Acquire()
{
do
{
lock (_lock)
{
_release.Reset();
if (_currentCount > 0)
{
_currentCount--;
_acquire.Set();
return;
}
}
_release.WaitOne();
} while (true);
}
public void Release()
{
do
{
lock (_lock)
{
_acquire.Reset();
if (_currentCount < _maxCount)
{
_currentCount++;
_release.Set();
return;
}
_acquire.WaitOne();
}
} while (true);
}

public void Dispose()
{
_acquire.Close();
_release.Close();
}
}

public class BransMutex : BransSemaphore
{
public BransMutex() : base(1, 1) { }
}

public class BransSpinSemaphore
{
protected volatile int _currentCount;
protected readonly int _maxCount;
protected readonly object _lock = new object();
public BransSpinSemaphore(int currentCount, int maxCount)
{
if (currentCount < 0)
throw new ArgumentOutOfRangeException("currentCount");
if (maxCount < 1)
throw new ArgumentOutOfRangeException("maxCount");

_currentCount = currentCount;
_maxCount = maxCount;
}
public void Acquire()
{
do
{
lock (_lock)
{
if (_currentCount > 0)
{
_currentCount--;
return;
}
}
Thread.Sleep(0);
} while (true);
}
public void Release()
{
do
{
lock (_lock)
{
if (_currentCount < _maxCount)
{
_currentCount++;
return;
}
}
Thread.Sleep(0);
} while (true);
}
}

public class BransSpinMutex : BransSpinSemaphore
{
public BransSpinMutex() : base(1, 1) { }
}

#endregion

#region Bran's Monitor

static class BransMonitor
{
private static readonly object _lock = new object();
private static readonly Dictionary<object, int> _currentRunner =
new Dictionary<object, int>();
public static void Enter(object obj)
{
//look for obj in Dict
//if there compare threadId
// if equal or missing return
//else wait for missing
int threadId = Thread.CurrentThread.ManagedThreadId;
do
{
lock (_lock) // I have no idea how the built-in Monitor class
does their locking -- it must be specially built into the CLR
{
int current;
if (_currentRunner.TryGetValue(obj, out current))
{
if (current == threadId)
return; // it's the same thread; let them go
}
else
{
// nobody is in there
_currentRunner.Add(obj, threadId);
return;
}

// we need to wait because a different thread is running
}
Thread.Sleep(0); // pseudo spin lock
} while (true);
}

public static void Exit(object obj)
{
int threadId = Thread.CurrentThread.ManagedThreadId;
do
{
lock (_lock) // I have no idea how the built-in Monitor class
does their locking -- it must be specially built into the CLR
{
int current;
if (_currentRunner.TryGetValue(obj, out current))
{
if (current == threadId)
{
_currentRunner.Remove(obj);
return; // it's the same thread; release it
}
}
else
{
// nobody is in there
// ignore it
return;
}

// we need to wait because a different thread has the lock
}
Thread.Sleep(0); // pseudo spin lock
} while (true);
}
}

#endregion

#region Increment Demonstration Class

class Incer : IDisposable
{
private int _count;
public void Inc() // completely unsafe
{
_count++;
}

private readonly object _lock = new object(); // static or not?
public void IncLock() // the traditional method
{
lock (_lock)
_count++;
}

public void IncMonitor() // equivalent to the lock keyword
{
Monitor.Enter(_lock);
try
{
_count++;
}
finally
{
Monitor.Exit(_lock);
}
}

public void IncLocker() // same as the lock keyword; this just
shows how it works
{
using (new Locker(_lock)) // this does allocate one additional
thing on the stack (it's a struct)
_count++;
}

public void IncInterlocked()
{
Interlocked.Increment(ref _count); // the preferred method
wherever possible
}

private int _exch = 1; // 1 means allowed in
public void IncExchange() // this demos a spin lock if we change
the sleep to be a NOP
{
while (Interlocked.Exchange(ref _exch, 0) <= 0) Thread.Sleep(0);
_count++;
Interlocked.Exchange(ref _exch, 1);
}

private readonly Mutex _mutex = new Mutex(); // same as Semaphore
(1,1) and incredibly slow
public void IncMutex()
{
_mutex.WaitOne(); // this, as in all things being disposed in the
finally clause, should be outside the try
try
{
_count++;
}
finally // no performance affect
{
_mutex.ReleaseMutex();
}
}

private readonly Semaphore _sem = new Semaphore(1, 1); // really
slow
public void IncSemaphore()
{
_sem.WaitOne();
try
{
_count++;
}
finally
{
_sem.Release();
}
}

private readonly BransMutex _bm = new BransMutex(); // same as
Semaphore(1,1)
public void IncBransMutex()
{
_bm.Acquire(); // this, as in all things being disposed in the
finally clause, should be outside the try
try
{
_count++;
}
finally // no performance affect
{
_bm.Release();
}
}

private readonly BransSemaphore _bs = new BransSemaphore(1, 1); //
system wide semaphore; she's expensive
public void IncBransSemaphore()
{
_bs.Acquire();
try
{
_count++;
}
finally
{
_bs.Release();
}
}

private readonly BransSpinMutex _bsm = new BransSpinMutex(); //
same as Semaphore(1,1)
public void IncBransSpinMutex()
{
_bsm.Acquire(); // this, as in all things being disposed in the
finally clause, should be outside the try
try
{
_count++;
}
finally // no performance affect
{
_bsm.Release();
}
}

private readonly BransSpinSemaphore _bss = new BransSpinSemaphore
(1, 1); // system wide semaphore; she's expensive
public void IncBransSpinSemaphore()
{
_bss.Acquire();
try
{
_count++;
}
finally
{
_bss.Release();
}
}

public void IncBransMonitor()
{
BransMonitor.Enter(_lock);
try
{
_count++;
}
finally // no performance affect
{
BransMonitor.Exit(_lock);
}
}

public int Count { get { return _count; } }

public void Dispose()
{
_mutex.Close();
_sem.Close();
_bm.Dispose();
_bs.Dispose();
}
}

#endregion

#region Main Method

[STAThread]
static void Main(string[] args)
{
var incer = new Incer();
var sw1 = new Stopwatch();
var sw2 = new Stopwatch();

var thread1 = new Thread(() =>
{
sw1.Start(); for (int i = 0; i < 5000000; i++) incer.IncInterlocked
(); sw1.Stop();
});
var thread2 = new Thread(() =>
{
sw2.Start(); for (int i = 0; i < 5000000; i++) incer.IncInterlocked
(); sw2.Stop();
});

thread1.Name = "t1";
thread1.SetApartmentState(ApartmentState.STA);
thread2.Name = "t2";
thread2.SetApartmentState(ApartmentState.STA);
thread1.Start();
thread2.Start();

thread1.Join();
thread2.Join();
Console.Out.WriteLine("Counted to {0} in {1} sec.", incer.Count,
sw1.Elapsed.TotalSeconds + sw2.Elapsed.TotalSeconds);
incer.Dispose();
}

#endregion
}
}

Peter Duniho · Sep 4, 2009

I put together the code below for some training at work. It contains
numerous methods for doing thread synchronization. Compile it in
release mode to see the various speeds. Change the functions in the
two "for" loops at the bottom (but keep them both the same function).
You'll need a multi-core CPU to see any problem with the non-protected
increment as the CLR is not dumb enough to break up an increment
function when scheduling threads on the same core.

It's not really the CLR so much as the platform itself. I haven't looked,
but I suspect the "_count++" gets JITted to an "inc" instruction. And I'm
pretty sure a context-switch can't happen mid-instruction for a thread,
even if the Windows scheduler is doing the switching. So with one core,
you can't switch from one thread to another until the increment has
finished.

I never did figure
out how the Monitor class itself does locking. It may be using some
kind of dictionary / data structure that doesn't require locking for
the TryGetValue call.

I forget the details, but I'm sure I've seen them before. So if you're
curious, Google should be able to find it for you. Monitor is reasonably
efficient, but it requires some kind of synchronization/locking, just as
any thread-synchronization mechanism would.

Have any of you seen a data collection structure
with a "TryGetValueOrAddNew" method that is natively thread safe?

Define "natively".

As far as your examples, I've got some comments:

-- You go to some lengths to describe various mechanisms as "slow".
But there's not any quantification of that, and the fact is for many
situations "slow" is something that just wouldn't ever be an issue. The
comment about ManualResetEvent being "system-wide" and thus "slow" has two
problems: "system-wide" doesn't in and of itself necessarily imply "slow",
and since all ManualResetEvents are anonymous, I doubt they are
"system-wide" anyway (you need to use EventWaitHandle to get a named
event).

For sure, using phrases like "incredibly slow" is strikingly misleading,
IMHO. And it seems to me that in many of the examples you've provided,
there are performance problems introduced simply via the implementation,
rather than because of the use of any specific .NET types, making any
performance comparison between different approaches invalid.

-- Generally, where you use ManualResetEvent instances, you should be
using the Monitor class notification methods instead: Wait(), Pulse(), etc.

-- Given that you're not doing real spin-waits in the so-called "spin"
implementations, you ought to be using Thread.Sleep(1) to ensure an
unconditional yield, rather than Thread.Sleep(0) (which will yield only to
threads of the same priority or higher).

-- Note that .NET 4 will include "spin"-ning lock implementations; you
might want to consider targeting .NET 4 for your demonstration.

-- I don't get the point of some of the implementations, in that they
add nothing semantically, and essentially delegate (sometimes with added
inefficiencies even beyond the wrapper logic) to the built-in .NET
mechanisms. For example, the Locker struct and BransMonitor class.

-- Speaking of the Locker struct, your comment that "this does
allocate one additional thing on the stack (it's a struct)" is dangerously
false. It's a common misconception that structs are always allocated on
the stack; they are not, and in your code, the Locker struct is boxed and
passed as an object instance to the "using" statement. It never exists on
the stack.

In addition to that basic error in understanding memory usage, there is
another potential pitfall here, which is the boxing of the struct in the
first place. Your code avoids a problem, because the "using" statement is
always working with the originally-boxed value of Locker. But any time
you introduce reference-type-like semantics on a value type (i.e. struct),
you run the risk of operating on the wrong boxed value at some point.
This is of particular concern for dealing with interfaces like IDisposable
and anything that involves locking, which of course are _both_
characteristics of your Locker struct.

Honestly, this pattern for synchronization really isn't very useful
anyway. There's practically no structural difference in the code between
using "using" and using "lock", but the pattern using "using" is
obfuscated (i.e. doesn't make it as clear that some synchronization is
going on), as well as less efficient (after all, inside you basically wind
up doing the same thing as the "lock" statement would anyway). And on top
of all that, once you have a type where its intended use is that it locks
upon construction and then you're supposed to dispose it when you want to
release a lock, you introduce the possibility of a bug where someone
forgets to dispose of it.

-- Finally, a couple of minor comments with respect to your Main()
method: any time you are doing performance measurement, you should execute
a "warm-up" loop (100-500 iterations is fine) before you actually start
measuring; and I don't see the point in setting the COM apartment state
for your threads, since as near as I can tell you're not using any COM
stuff anyway, nor should it really affect performance in any case (the
apartment state is more a code-correctness thing, related to whether the
code is doing its own synchronization or is going to rely on COM to handle
data marshaling for it).

Hope that helps.

Pete

Jeroen Mostert · Sep 4, 2009

not_a_commie said:
I put together the code below for some training at work. It contains
numerous methods for doing thread synchronization. Compile it in
release mode to see the various speeds. Change the functions in the
two "for" loops at the bottom (but keep them both the same function).
You'll need a multi-core CPU to see any problem with the non-protected
increment as the CLR is not dumb enough to break up an increment
function when scheduling threads on the same core. I never did figure
out how the Monitor class itself does locking.

Every .NET object has a (reference to a) sync block, allocated on demand,
that's used for locking. For more details, see
http://msdn.microsoft.com/magazine/cc188793. However, to really understand
how *that* works under the hood, you'd have to know how critical sections
work. If you're not contend to leave it at "they just do", I suggest digging
into "Windows Internals" (4th or 5th edition). And getting out your debugger.

It may be using some kind of dictionary / data structure that doesn't
require locking for the TryGetValue call. Have any of you seen a data
collection structure with a "TryGetValueOrAddNew" method that is natively
thread safe?

I have no idea what you mean by "natively", nor do I care to guess. However,
there's a fascinating field of study called non-blocking synchronization
that may contain the idea you're thinking of.

As for your code, I really don't see the point of timing these classes.
You're mixing multiple lower-level mechanisms which have their own
performance characteristics. For example, the reason ManualResetEvent is so
"expensive" is not because it's "system-wide" but because operations on it
require transitions to unmanaged code, and from there to kernel mode (they
just wrap Win32 events). By contrast, .NET monitors are optimized to avoid
such transitions where possible. If your classes were representative of how
the system primitives are actually implemented it might be insightful, but
this mixing of levels means they're not.

And, it hopefully goes without saying, your classes should not be used in
production code. Unless you're a multithreading expert with a clear need,
there's no reason not to stick to the existing synchronization primitives.
Monitors in particular are a very versatile technique that should cover most
of your needs.

not_a_commie · Sep 4, 2009

Thanks for the feedback! Response inline:

I forget the details, but I'm sure I've seen them before. So if you're
curious, Google should be able to find it for you. Monitor is reasonably
efficient, but it requires some kind of synchronization/locking, just as
any thread-synchronization mechanism would.
Define "natively".

By natively, I mean a non-blocking locking mechanism. Something that
uses atomic operations in a specific order so as to always be thread
safe. For example, the assignment operator is always thread safe.
Perhaps reading and assigning things in the right order I could check
for a value in a list or add to the list if my item doesn't exist.

comment about ManualResetEvent being "system-wide" and thus "slow" has two
problems: "system-wide" doesn't in and of itself necessarily imply "slow",
and since all ManualResetEvents are anonymous, I doubt they are
"system-wide" anyway (you need to use EventWaitHandle to get a named
event).

ManualResetEvent inherits from EventWaitHandle. I assume they both go
all the way into the kernel (as the other poster mentioned) regardless
of whether or not you use a system-wide name.

-- Generally, where you use ManualResetEvent instances, you should be
using the Monitor class notification methods instead: Wait(), Pulse(), etc.

This is interesting. It's not clear to me from the documentation how
to use Monitor.Pulse. Explain to me how you would modify the
*Semaphore classes I posted to use this mechanism?

-- Given that you're not doing real spin-waits in the so- called "spin"
implementations, you ought to be using Thread.Sleep(1) to ensure an
unconditional yield, rather than Thread.Sleep(0) (which will yield only to
threads of the same priority or higher).

It's true, I'm not doing true spin locks. I would need to comment out
the Sleep calls to do true spin locks. That would make for a mess of a
test on a single core machine, though. If you are doing real speed
testing comment those out.

-- I don't get the point of some of the implementations, in that they
add nothing semantically, and essentially delegate (sometimes with added
inefficiencies even beyond the wrapper logic) to the built-in .NET
mechanisms.

The various implementations aren't designed to be useful -- they were
designed to teach people what's going on under the covers.

-- Speaking of the Locker struct, your comment that "this does
allocate one additional thing on the stack (it's a struct)" is dangerously
false. It's a common misconception that structs are always allocated on
the stack; they are not, and in your code, the Locker struct is boxed and
passed as an object instance to the "using" statement. It never existson
the stack.

Thanks for clarifying that. I wasn't aware that the "using" statement
took an input by reference. It makes sense now, though -- similar to
the way the "lock" statement just passes to Monitor.Enter.

Honestly, this pattern for synchronization really isn't very useful
anyway. There's practically no structural difference in the code between
using "using" and using "lock",

There's one significant advantage with the "using" vs. the "lock". The
"using" allows you to easily put in logging for your Monitor.Enter/
Exit calls. It can be useful to debug a deadlock.

Peter Duniho · Sep 4, 2009

[...]

Define "natively".

Click to expand...

By natively, I mean a non-blocking locking mechanism.

Then you should use the phrase "lock-free synchronization", not "natively".

If you want some excellent examples of lock-free code, Joe Duffy's blog is
a good place to look. There's not a specific dictionary implementation
such as what you're asking about, but there's enough there to give you
some ideas if you're really interested.

http://www.bluebytesoftware.com/blog/

A Google search using the phrase "lock-free" will likely turn up other
useful examples too. Probably not a specific dictionary implementation,
but perhaps something close.

Note that lock-free algorithms are generally much more complicated, and do
not necessarily guarantee better performance.

Something that
uses atomic operations in a specific order so as to always be thread
safe. For example, the assignment operator is always thread safe.

No, it's not.

First of all, any atomicity of the assignment operator is highly platform
dependent, and it's trivial to create code in .NET where it's not thread
safe (just make a struct that's larger than 4 bytes long...any assignment
of that struct value isn't thread-safe).

Secondly, atomicity does not necessarily mean "thread-safe". It depends
on the usage of the code. Atomicity can be used to implement thread
safety, but it's possible to have atomic operations without thread safety.

[...]

comment about ManualResetEvent being "system-wide" and thus "slow" has
two Â
problems: "system-wide" doesn't in and of itself necessarily imply
"slow", Â
and since all ManualResetEvents are anonymous, I doubt they are Â
"system-wide" anyway (you need to use EventWaitHandle to get a named Â
event).

Click to expand...

ManualResetEvent inherits from EventWaitHandle. I assume they both go
all the way into the kernel (as the other poster mentioned) regardless
of whether or not you use a system-wide name.

Using the kernel doesn't mean it's "system-wide". If you want to assert
that the type is slow on the basis of the kernel transitions, then say
that rather than making an incorrect statement about "system-wide".

And yes, the kernel transitions slow things down, but that's true for all
sorts of things. A lot of multithreaded code is _full_ of kernel
transitions already, not even counting synchronization needs. The mere
presence of kernel transitions doesn't necessarily mean that the code will
be significantly slower than if you could get rid of a particular source
of kernel transitions. It all depends on whether that particular source
is a bottleneck or not.

This is interesting. It's not clear to me from the documentation how
to use Monitor.Pulse. Explain to me how you would modify the
*Semaphore classes I posted to use this mechanism?

The general issue a semaphore is solving is to allow for client code to
block and wait for a finite resource to become available. That's exactly
what Wait() and Pulse() do. When client code acquires the lock but
discovers the resource is currently at its maximum acquisition count, the
client code calls Wait(); doing so releases the lock and puts the client
into a wait queue. When client code releases its acquisition of the
resource, it calls Pulse(), which will wake up one waiting thread and
allow it to proceed to acquire the count just released.

As an added benefit, the Monitor class's implementation includes an actual
wait queue, so that clients are granted the resource on a FIFO basis. If
fairness of acquisition is important to the algorithm, this is an
important feature (it's not always, but sometimes it is).

It's true, I'm not doing true spin locks. I would need to comment out
the Sleep calls to do true spin locks. That would make for a mess of a
test on a single core machine, though. If you are doing real speed
testing comment those out.

That's not my point. My point is that if you're calling Sleep(), you
don't care about performance measurements or a true implementation of a
spin-wait. So you might as well make your yielding as system-friendly as
possible. The implementation you're showing can starve lower-priority
threads, which is not very system-friendly.

The various implementations aren't designed to be useful -- they were
designed to teach people what's going on under the covers.

I'm sorry, but it's not clear to me how they do that. Most of the
implementations you've shown are not actually indicative of precisely how
the .NET constructs are implemented. In fact, because they simply
delegate to the .NET implementation, they don't really illustrate anything
at all.

It's as if you demonstrated "what's going on under the covers" with a
System.Windows.Forms.TextBox by writing a Control sub-class that simply
keeps an internal TextBox and calls all the TextBox members as the
implementation for its own members.

[...]

Honestly, this pattern for synchronization really isn't very useful Â
anyway. Â There's practically no structural difference in the code
between Â
using "using" and using "lock",

Click to expand...

There's one significant advantage with the "using" vs. the "lock". The
"using" allows you to easily put in logging for your Monitor.Enter/
Exit calls. It can be useful to debug a deadlock.

The most useful way to debug a deadlock is to ensure you can't have one in
the first place. See "lock-leveling", for example.

Also, I don't see how logging lock acquisition and releasing helps debug a
deadlock. When you have deadlock, it's trivial to see in the debugger
what locks are involved; you just break into the code and see where the
code is waiting. That will show you not only what locks have been
acquired, but also which ones are trying to be acquired. What would
logging add to that?

Note also that for the logging to be useful, you have to have some way of
identifying the locking object involved; a plain old "object" isn't going
to provide that. You could use a string instance as your locking object,
and then log the string text, but then your locking just got more
heavyweight, because of the storage needed for the string. And of course,
if you're concerned about performance, logging inside synchronization
implementations is definitely counter-productive.

Personally, I feel that the drawbacks far outweigh any potential benefit.
Efficiency is reduced (not even counting the potential overhead of
logging), you have a greater opportunity for bugs, all without really
improving the debugging scenarios in a significant way.

Pete

not_a_commie · Sep 4, 2009

So I created a "BransPulseSemaphore", but it's not quite as performant
as "BransSpinSemaphore" with the Thread.Sleep calls removed. It's
maybe 50% slower. In other words, spin locks are faster than
recovering from a thread wait call, but we knew that already. Here is
the class:

public class BransPulseSemaphore
{
protected volatile int _currentCount;
protected readonly int _maxCount;
protected readonly object _lock = new object();
public BransPulseSemaphore(int currentCount, int maxCount)
{
if (currentCount < 0)
throw new ArgumentOutOfRangeException("currentCount");
if (maxCount < 1)
throw new ArgumentOutOfRangeException("maxCount");

_currentCount = currentCount;
_maxCount = maxCount;
}
public void Acquire()
{
lock (_lock)
{
do
{
if (_currentCount > 0)
{
_currentCount--;
Monitor.Pulse(_lock);
return;
}
Monitor.Wait(_lock);
} while (true);
}
}

public void Release()
{
lock (_lock)
{
do
{
if (_currentCount < _maxCount)
{
_currentCount++;
Monitor.Pulse(_lock);
return;
}
Monitor.Wait(_lock);
} while (true);
}
}
}

public class BransPulseMutex : BransPulseSemaphore
{
public BransPulseMutex() : base(1, 1) { }
}

Peter Duniho · Sep 5, 2009

So I created a "BransPulseSemaphore", but it's not quite as performant
as "BransSpinSemaphore" with the Thread.Sleep calls removed. It's
maybe 50% slower.

As discussed before, "performant" is not a useful concept for these
examples. If you wanted performance, you wouldn't make your own
semaphores out of existing semaphores. You'd just use the existing
semaphores.

Also, try using spin waits on a single-core machine, or in a more complex
scenario than your degenerate "tight-loop increment" and see what that
does for performance (hint: it's awful).

As far as your specific implementation goes, you might be able to make it
go faster by not waking your threads up unless you know there's a reason
to (e.g. don't call Pulse() if the count is already at zero...there are
other refinements you could make). Or it might not affect performance
much; I haven't bothered to try, because there's really not any point.

Basically, there's not really any useful data regarding performance to be
gleaned from these samples.

Also, the loop in your Release() method is a poor choice, IMHO. If you
get into Release() and the current count is already at the maximum, the
client code has a bug; you should just thrown an exception. By failing
silently, you haven't actually prevented an eventual failure, but you have
deferred it to some other later moment in time, making it much harder to
debug.

Pete

not_a_commie · Sep 7, 2009

As discussed before, "performant" is not a useful concept for these

examples. If you wanted performance, you wouldn't make your own
semaphores out of existing semaphores. You'd just use the existing
semaphores.

That's completely not true. I made the other semaphore and mutex
classes because the built in classes are so slow. If you care anything
about performance and don't need to use a global named mutex or
semaphore, don't use the built-in classes. The wait/pulse semaphore
(updated again below) is 9x faster than the built-in semaphore class.

Also, try using spin waits on a single-core machine, or in a more complex
scenario than your degenerate "tight-loop increment" and see what that
does for performance (hint: it's awful).

Agreed. Don't use the spin locks unless you're doing a DMA transfer.

As far as your specific implementation goes, you might be able to make it
go faster by not waking your threads up unless you know there's a reason
to (e.g. don't call Pulse() if the count is already at zero...there are
other refinements you could make).

I was unable to implement this suggestion. I'm not sure it makes sense
after I changed things for your next suggestion. See the code below.

Also, the loop in your Release() method is a poor choice, IMHO. If you
get into Release() and the current count is already at the maximum, the
client code has a bug; you should just thrown an exception. By failing
silently, you haven't actually prevented an eventual failure

Good suggestion. I incorporated it into the code below. It eliminates
the need for the pulse call in the Acquire method.

public class BransPulseSemaphore
{
protected volatile int _currentCount;
protected readonly int _maxCount;
protected readonly object _lock = new object();
public BransPulseSemaphore(int currentCount, int maxCount)
{
if (currentCount < 0)
throw new ArgumentOutOfRangeException("currentCount");
if (maxCount < 1)
throw new ArgumentOutOfRangeException("maxCount");

_currentCount = currentCount;
_maxCount = maxCount;
}
public void Acquire()
{
lock (_lock)
{
do
{
if (_currentCount > 0)
{
--_currentCount;
return;
}
Monitor.Wait(_lock);
} while (true);
}
}

public void Release()
{
lock (_lock)
{
if (_currentCount < _maxCount)
{
++_currentCount;
Monitor.Pulse(_lock);
return;
}
}
throw new InvalidOperationException();
}
}

about using the Lock synchorization mechanism	5	May 10, 2010
Why do it this way	1	Aug 8, 2004
synchronizations of threads	1	Oct 19, 2008
C# Deque found on the web--thanks to The Game Programming Wiki	9	Aug 8, 2007
Use of event handling	6	Dec 23, 2011
is it any point to have readonly field in this code	1	Nov 4, 2010
Custom Attribute trouble	2	May 24, 2008
a question about binary serialization	3	Apr 21, 2011

synchronization demo

not_a_commie

Peter Duniho

Jeroen Mostert

not_a_commie

Peter Duniho

not_a_commie

Peter Duniho

not_a_commie

Ask a Question

Similar Threads