Anonymizing JSON

BinderScriptNotebook

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the FSharp.Data package, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the FSharp.Data package, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) =

    let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
    let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

    let rng = Random()

    let digits = [| '0' .. '9' |]
    let lowerLetters = [| 'a' .. 'z' |]
    let upperLetters = [| 'A' .. 'Z' |]

    let getRandomChar (c: char) =
        if Char.IsDigit c then
            digits.[rng.Next(10)]
        elif Char.IsLetter c then
            if Char.IsLower c then
                lowerLetters.[rng.Next(26)]
            else
                upperLetters.[rng.Next(26)]
        else
            c

    let randomize (str: string) =
        String(str.ToCharArray() |> Array.map getRandomChar)

    let isType testType typ =
        match typ with
        | Runtime.StructuralTypes.InferedType.Primitive(typ, _, _, _) -> typ = testType
        | _ -> false

    let rec anonymize json =
        match json with
        | JsonValue.String s when valuesToSkip.Contains s -> json
        | JsonValue.String s ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    s
                    None

            (if typ |> isType typeof<Guid> then
                 Guid.NewGuid().ToString()
             elif
                 typ |> isType typeof<Runtime.StructuralTypes.Bit0>
                 || typ |> isType typeof<Runtime.StructuralTypes.Bit1>
             then
                 s
             elif typ |> isType typeof<DateTime> then
                 s
             else
                 let prefix, s =
                     if s.StartsWith "http://" then
                         "http://", s.Substring("http://".Length)
                     elif s.StartsWith "https://" then
                         "https://", s.Substring("https://".Length)
                     else
                         "", s

                 prefix + randomize s)
            |> JsonValue.String
        | JsonValue.Number d ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    (d.ToString())
                    None

            if
                typ |> isType typeof<Runtime.StructuralTypes.Bit0>
                || typ |> isType typeof<Runtime.StructuralTypes.Bit1>
            then
                json
            else
                d.ToString() |> randomize |> Decimal.Parse |> JsonValue.Number
        | JsonValue.Float f -> f.ToString() |> randomize |> Double.Parse |> JsonValue.Float
        | JsonValue.Boolean _
        | JsonValue.Null -> json
        | JsonValue.Record props ->
            props
            |> Array.map (fun (key, value) ->
                let newValue =
                    if propertiesToSkip.Contains key then
                        value
                    else
                        anonymize value

                key, newValue)
            |> JsonValue.Record
        | JsonValue.Array array -> array |> Array.map anonymize |> JsonValue.Array

    member _.Anonymize json = anonymize json

let json = JsonValue.Load(__SOURCE_DIRECTORY__ + "../../data/TwitterStream.json")

printfn "%O" json

let anonymizedJson = (JsonAnonymizer [ "lang" ]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System

namespace System.Globalization

Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp

Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data

Multiple items
type JsonAnonymizer = new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer member Anonymize: json: JsonValue -> JsonValue

--------------------
new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer

val propertiesToSkip: string list option

val valuesToSkip: string list option

val propertiesToSkip: Set<string>

Multiple items
module Set from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> = interface IReadOnlyCollection<'T> interface IStructuralEquatable interface IComparable interface IEnumerable interface IEnumerable<'T> interface ICollection<'T> new: elements: 'T seq -> Set<'T> member Add: value: 'T -> Set<'T> member Contains: value: 'T -> bool member IsProperSubsetOf: otherSet: Set<'T> -> bool ...

--------------------
new: elements: 'T seq -> Set<'T>

val ofList: elements: 'T list -> Set<'T> (requires comparison)

val defaultArg: arg: 'T option -> defaultValue: 'T -> 'T

val valuesToSkip: Set<string>

val rng: Random

Multiple items
type Random = new: unit -> unit + 1 overload member GetHexString: stringLength: int * ?lowercase: bool -> string + 1 overload member GetItems<'T> : choices: ReadOnlySpan<'T> * length: int -> 'T array + 2 overloads member GetString: choices: ReadOnlySpan<char> * length: int -> string member Next: unit -> int + 2 overloads member NextBytes: buffer: byte array -> unit + 1 overload member NextDouble: unit -> float member NextInt64: unit -> int64 + 2 overloads member NextSingle: unit -> float32 member Shuffle<'T> : values: Span<'T> -> unit + 1 overload ...
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>

--------------------
Random() : Random
Random(Seed: int) : Random

val digits: char array

val lowerLetters: char array

val upperLetters: char array

val getRandomChar: c: char -> char

val c: char

Multiple items
val char: value: 'T -> char (requires member op_Explicit)

--------------------
type char = Char

type Char = member CompareTo: value: char -> int + 1 overload member Equals: obj: char -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 2 overloads static member ConvertFromUtf32: utf32: int -> string static member ConvertToUtf32: highSurrogate: char * lowSurrogate: char -> int + 1 overload static member GetNumericValue: c: char -> float + 1 overload static member GetUnicodeCategory: c: char -> UnicodeCategory + 1 overload static member IsAscii: c: char -> bool ...
<summary>Represents a character as a UTF-16 code unit.</summary>

Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool

Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int

Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool

Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool

val randomize: str: string -> String

val str: string

Multiple items
val string: value: 'T -> string

--------------------
type string = String

Multiple items
type String = interface seq<char> interface IEnumerable interface ICloneable interface IComparable interface IComparable<string> interface IConvertible interface IEquatable<string> interface IParsable<string> interface ISpanParsable<string> new: value: nativeptr<char> -> unit + 8 overloads ...
<summary>Represents text as a sequence of UTF-16 code units.</summary>

--------------------
String(value: nativeptr<char>) : String
String(value: char array) : String
String(value: ReadOnlySpan<char>) : String
String(value: nativeptr<sbyte>) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: char array, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String

String.ToCharArray() : char array
String.ToCharArray(startIndex: int, length: int) : char array

type Array = interface ICollection interface IEnumerable interface IList interface IStructuralComparable interface IStructuralEquatable interface ICloneable member Clone: unit -> obj member CopyTo: array: Array * index: int -> unit + 1 overload member GetEnumerator: unit -> IEnumerator member GetLength: dimension: int -> int ...
<summary>Provides methods for creating, manipulating, searching, and sorting arrays, thereby serving as the base class for all arrays in the common language runtime.</summary>

val map: mapping: ('T -> 'U) -> array: 'T array -> 'U array

val isType: testType: Type -> typ: Runtime.StructuralTypes.InferedType -> bool

val testType: Type

val typ: Runtime.StructuralTypes.InferedType

Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime

union case Runtime.StructuralTypes.InferedType.Primitive: typ: Type * unit: Type option * optional: bool * shouldOverrideOnMerge: bool -> Runtime.StructuralTypes.InferedType

val typ: Type

val anonymize: json: JsonValue -> JsonValue

val json: JsonValue

type JsonValue = | String of string | Number of decimal | Float of float | Record of properties: (string * JsonValue) array | Array of elements: JsonValue array | Boolean of bool | Null member Request: url: string * ?httpMethod: string * ?headers: (string * string) seq -> HttpResponse member RequestAsync: url: string * ?httpMethod: string * ?headers: (string * string) seq -> Async<HttpResponse> member ToString: saveOptions: JsonSaveOptions * ?indentationSpaces: int -> string + 2 overloads member WriteTo: w: TextWriter * saveOptions: JsonSaveOptions * ?indentationSpaces: int -> unit static member AsyncLoad: uri: string * ?encoding: Encoding -> Async<JsonValue> static member Load: stream: Stream -> JsonValue + 2 overloads static member Parse: text: string -> JsonValue static member ParseMultiple: text: string -> JsonValue seq static member TryParse: text: string -> JsonValue option
<summary> Represents a JSON value. Large numbers that do not fit in the Decimal type are represented using the Float case, while smaller numbers are represented as decimals to avoid precision loss. </summary>

union case JsonValue.String: string -> JsonValue
<summary> A JSON string value </summary>

val s: string

member Set.Contains: value: 'T -> bool

val typ: (Type option -> Runtime.StructuralTypes.InferedType)

union case Runtime.StructuralInference.InferenceMode'.ValuesOnly: Runtime.StructuralInference.InferenceMode'
<summary> Backward compatible mode. </summary>

Multiple items
type CultureInfo = interface ICloneable interface IFormatProvider new: culture: int -> unit + 3 overloads member ClearCachedData: unit -> unit member Clone: unit -> obj member Equals: value: obj -> bool member GetConsoleFallbackUICulture: unit -> CultureInfo member GetFormat: formatType: Type -> obj member GetHashCode: unit -> int member ToString: unit -> string ...
<summary>Provides information about a specific culture (called a locale for unmanaged code development). The information includes the names for the culture, the writing system, the calendar used, the sort order of strings, and formatting for dates and numbers.</summary>

--------------------
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo

property CultureInfo.InvariantCulture: CultureInfo with get
<summary>Gets the <see cref="T:System.Globalization.CultureInfo" /> object that is culture-independent (invariant).</summary>
<returns>The object that is culture-independent (invariant).</returns>

union case Option.None: Option<'T>

val typeof<'T> : Type

Multiple items
type Guid = new: b: byte array -> unit + 6 overloads member CompareTo: value: Guid -> int + 1 overload member Equals: g: Guid -> bool + 1 overload member GetHashCode: unit -> int member ToByteArray: unit -> byte array + 1 overload member ToString: unit -> string + 2 overloads member TryFormat: utf8Destination: Span<byte> * bytesWritten: byref<int> * ?format: ReadOnlySpan<char> -> bool + 1 overload member TryWriteBytes: destination: Span<byte> -> bool + 1 overload static member (<) : left: Guid * right: Guid -> bool static member (<=) : left: Guid * right: Guid -> bool ...
<summary>Represents a globally unique identifier (GUID).</summary>

--------------------
Guid ()
Guid(b: byte array) : Guid
Guid(b: ReadOnlySpan<byte>) : Guid
Guid(g: string) : Guid
Guid(b: ReadOnlySpan<byte>, bigEndian: bool) : Guid
Guid(a: int, b: int16, c: int16, d: byte array) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid

Guid.NewGuid() : Guid

Multiple items
type DateTime = new: date: DateOnly * time: TimeOnly -> unit + 16 overloads member Add: value: TimeSpan -> DateTime member AddDays: value: float -> DateTime member AddHours: value: float -> DateTime member AddMicroseconds: value: float -> DateTime member AddMilliseconds: value: float -> DateTime member AddMinutes: value: float -> DateTime member AddMonths: months: int -> DateTime member AddSeconds: value: float -> DateTime member AddTicks: value: int64 -> DateTime ...
<summary>Represents an instant in time, typically expressed as a date and time of day.</summary>

--------------------
DateTime ()
   (+0 other overloads)
DateTime(ticks: int64) : DateTime
   (+0 other overloads)
DateTime(date: DateOnly, time: TimeOnly) : DateTime
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(date: DateOnly, time: TimeOnly, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
   (+0 other overloads)

val prefix: string

String.StartsWith(value: string) : bool
String.StartsWith(value: char) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool

String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string

union case JsonValue.Number: decimal -> JsonValue
<summary> A JSON number stored as a decimal (used for numbers that fit in the decimal range) </summary>

val d: decimal

Decimal.ToString() : string
Decimal.ToString( format: string) : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString( format: string, provider: IFormatProvider) : string

Multiple items
type Decimal = new: value: float -> unit + 8 overloads member CompareTo: value: decimal -> int + 1 overload member Equals: value: decimal -> bool + 2 overloads member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: utf8Destination: Span<byte> * bytesWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool + 1 overload static member (%) : d1: decimal * d2: decimal -> decimal static member ( * ) : d1: decimal * d2: decimal -> decimal static member (+) : d1: decimal * d2: decimal -> decimal ...
<summary>Represents a decimal floating-point number.</summary>

--------------------
Decimal ()
Decimal(value: float) : Decimal
Decimal(value: int) : Decimal
Decimal(bits: int array) : Decimal
Decimal(value: int64) : Decimal
Decimal(bits: ReadOnlySpan<int>) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: uint64) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal

Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: ReadOnlySpan<char>, provider: IFormatProvider) : decimal
Decimal.Parse(utf8Text: ReadOnlySpan<byte>, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
Decimal.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : decimal
Decimal.Parse(utf8Text: ReadOnlySpan<byte>, ?style: NumberStyles, ?provider: IFormatProvider) : decimal

union case JsonValue.Float: float -> JsonValue
<summary> A JSON number stored as a float (used for large numbers that do not fit in decimal) </summary>

val f: float

Double.ToString() : string
Double.ToString( format: string) : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString( format: string, provider: IFormatProvider) : string

type Double = member CompareTo: value: float -> int + 1 overload member Equals: obj: float -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: utf8Destination: Span<byte> * bytesWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool + 1 overload static member (<) : left: float * right: float -> bool static member (<=) : left: float * right: float -> bool static member (<>) : left: float * right: float -> bool static member (=) : left: float * right: float -> bool ...
<summary>Represents a double-precision floating-point number.</summary>

Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: ReadOnlySpan<char>, provider: IFormatProvider) : float
Double.Parse(utf8Text: ReadOnlySpan<byte>, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
Double.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : float
Double.Parse(utf8Text: ReadOnlySpan<byte>, ?style: NumberStyles, ?provider: IFormatProvider) : float

union case JsonValue.Boolean: bool -> JsonValue
<summary> A JSON boolean value </summary>

union case JsonValue.Null: JsonValue
<summary> A JSON null value </summary>

union case JsonValue.Record: properties: (string * JsonValue) array -> JsonValue
<summary> A JSON object, represented as an array of name-value pairs </summary>

val props: (string * JsonValue) array

val key: string

val value: JsonValue

val newValue: JsonValue

union case JsonValue.Array: elements: JsonValue array -> JsonValue
<summary> A JSON array of values </summary>

Multiple items
val array: JsonValue array

--------------------
type 'T array = 'T array

static member JsonValue.Load: reader: IO.TextReader -> JsonValue
static member JsonValue.Load: stream: IO.Stream -> JsonValue
static member JsonValue.Load: uri: string * ?encoding: Text.Encoding -> JsonValue

val printfn: format: Printf.TextWriterFormat<'T> -> 'T

val anonymizedJson: JsonValue