initial commit
This commit is contained in:
parent
5d4e05af94
commit
e4646c4b62
17
README.md
17
README.md
@ -1,2 +1,19 @@
|
|||||||
# benfords-law
|
# benfords-law
|
||||||
|
|
||||||
|
This was a test to determine if random numbers follow [Benford's Law](https://en.wikipedia.org/wiki/Benford%27s_law). I suspect it has more to do with the distribution real life data collected from than truly random numbers. If this is true, maybe this is why it works for fraud detection.
|
||||||
|
|
||||||
|
### Results
|
||||||
|
|
||||||
|
With One-Hundred-Million samples of random numbers between 0 and 9,999,999,999,999,999, the number of leading digits was the following:
|
||||||
|
|
||||||
|
1: 11105630
|
||||||
|
2: 11110535
|
||||||
|
3: 11112084
|
||||||
|
4: 11113667
|
||||||
|
5: 11120216
|
||||||
|
6: 11106549
|
||||||
|
7: 11108623
|
||||||
|
8: 11114813
|
||||||
|
9: 11107883
|
||||||
|
|
||||||
|
This shows that Benford's law only works when the data is not random, such as natural data gathered in real life. This is because natural data is generated following a power law, which is common in nature.
|
||||||
|
73
main.go
Normal file
73
main.go
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"math"
|
||||||
|
"math/rand"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
randomMin = 0
|
||||||
|
randomMax = 9999999999999999
|
||||||
|
numSamples = 10000000
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
results := [9]int{} // There are 9 possible leading digits and 0 does not count, offset by 1 for index to actual value. Examples: To access 1 use [0]. To access 5 use [4]. To access 9 use [8].
|
||||||
|
currentSample := 0
|
||||||
|
|
||||||
|
statusTicker := time.NewTicker(time.Second)
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
<-statusTicker.C
|
||||||
|
percentCompleted := (currentSample * 100) / numSamples
|
||||||
|
log.Printf("%d %% completed generating and analyzing samples", percentCompleted)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
log.Printf("generating numbers...")
|
||||||
|
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
generatedNumbers := make(chan int, 1024)
|
||||||
|
for i := 0; i < runtime.NumCPU(); i++ {
|
||||||
|
go generatorWorker(generatedNumbers)
|
||||||
|
}
|
||||||
|
|
||||||
|
for currentSample = 0; currentSample < numSamples; currentSample++ {
|
||||||
|
results[firstDigit(<-generatedNumbers)-1]++
|
||||||
|
}
|
||||||
|
|
||||||
|
statusTicker.Stop()
|
||||||
|
log.Printf("done.")
|
||||||
|
|
||||||
|
// output results
|
||||||
|
for digitMinusOne, count := range results {
|
||||||
|
fmt.Printf("%d: %d\n", digitMinusOne+1, count)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Print("Press 'Enter' to continue...")
|
||||||
|
bufio.NewReader(os.Stdin).ReadBytes('\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
func generatorWorker(returnChannel chan int) {
|
||||||
|
for {
|
||||||
|
returnChannel <- rand.Intn(randomMax-randomMin+1) + randomMin // We must use Intn instead of Int because from Base10's perspective, integers cut off at a really weird spot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstDigit(x int) int {
|
||||||
|
return int(math.Abs(float64(x)) / math.Pow(10, float64(numDigits(x)-1)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func numDigits(x int) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return int(math.Floor(math.Log10(math.Abs(float64(x))))) + 1
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user