Welcome to the Tsonnet series!
If you're not following along, check out how it all started in the first post of the series.
In the previous post, I implemented object inner references:
Tsonnet #29 - Making inner references work
Hercules Lemke Merscher ・ Dec 13 '25
This time, we're tackling equality. Fair warning: this won't be the final implementation, just the first working version. Consider this post a field report from the trenches.
On implementing equality
I've been dabbling with equality in Tsonnet for the past few days. At first sight, equality doesn't seem like an operation hiding too much complexity, but as we uncover the internal details of the language, and apply the design reasoning to it, things start showing up, and you can find yourself in a rabbit hole.
To start with, the AST change is minimal -- just one more variant added to the binary operator:
diff --git a/lib/ast.ml b/lib/ast.ml
index f862f55..ab91d6f 100644
--- a/lib/ast.ml
+++ b/lib/ast.ml
@@ -5,6 +5,7 @@ type bin_op =
| Subtract
| Multiply
| Divide
+ | Equality
[@@deriving qcheck, show]
type unary_op =
@@ -120,6 +183,7 @@ let rec string_of_type = function
| Subtract -> "-"
| Multiply -> "*"
| Divide -> "/"
+ | Equality -> "=="
in prefix ^ " " ^ bin_op
| UnaryOp (_, unary_op, _) ->
let prefix = "Unary Operation" in
The lexer and parser follow suit, as boring as it might look like:
diff --git a/lib/lexer.mll b/lib/lexer.mll
index 9803a55..6f51b6f 100644
--- a/lib/lexer.mll
+++ b/lib/lexer.mll
@@ -45,6 +45,7 @@ rule read =
| "|||" { read_verbatim_string (Buffer.create 16) lexbuf }
| "@\"" { read_single_line_verbatim_double_quoted_string (Buffer.create 16) lexbuf }
| "@'" { read_single_line_verbatim_single_quoted_string (Buffer.create 16) lexbuf }
+ | "==" { EQUALITY }
| '[' { LEFT_SQR_BRACKET }
| ']' { RIGHT_SQR_BRACKET }
| '{' { LEFT_CURLY_BRACKET }
diff --git a/lib/parser.mly b/lib/parser.mly
index 6a5b704..158e4ab 100644
--- a/lib/parser.mly
+++ b/lib/parser.mly
@@ -29,6 +29,7 @@
%token SEMICOLON
%token LOCAL
%token ASSIGN
+%token EQUALITY
%token EOF
%start <Ast.expr> prog
@@ -147,6 +148,7 @@ obj_field_access:
| MINUS { Subtract }
| MULTIPLY { Multiply }
| DIVIDE { Divide }
+ | EQUALITY { Equality }
;
%inline unary_op:
Type checking was also surprisingly simple:
diff --git a/lib/type.ml b/lib/type.ml
index 6334c27..5de3c4c 100644
--- a/lib/type.ml
+++ b/lib/type.ml
@@ -172,13 +172,7 @@ let rec translate venv expr =
(ok (venv, Tunit))
exprs
| BinOp (pos, op, e1, e2) ->
- (let* (venv', e1') = translate venv e1 in
- let* (venv'', e2') = translate venv' e2 in
- match op, e1', e2' with
- | _, Tnumber, Tnumber -> ok (venv'', Tnumber)
- | Add, _, Tstring | Add, Tstring, _ -> ok (venv'', Tstring)
- | _ -> Error.trace Error.Msg.invalid_binary_op pos >>= error
- )
+ translate_bin_op venv pos op e1 e2
| UnaryOp (pos, op, expr) ->
(let* (venv', expr') = translate venv expr in
match op, expr' with
@@ -320,6 +314,18 @@ and translate_object_field_access venv pos scope chain_exprs =
(ok (venv, obj))
chain_exprs
+and translate_bin_op venv pos op e1 e2 =
+ let* (venv', e1') = translate venv e1 in
+ let* (venv'', e2') = translate venv' e2 in
+ match op, e1', e2' with
+ | Add, _, Tstring | Add, Tstring, _ -> ok (venv'', Tstring)
+ | Add, Tnumber, Tnumber -> ok (venv'', Tnumber)
+ | Subtract, Tnumber, Tnumber -> ok (venv'', Tnumber)
+ | Multiply, Tnumber, Tnumber -> ok (venv'', Tnumber)
+ | Divide, Tnumber, Tnumber -> ok (venv'', Tnumber)
+ | Equality, _, _ -> ok (venv'', Tbool)
+ | _ -> Error.trace Error.Msg.invalid_binary_op pos >>= error
+
let check (config : Config.t) expr =
let* _ = Scope.validate expr in
if config.skip_typecheck then
No matter what's on the left or right, the result must be a boolean type.
And then, comes the interpreter.
In my first iteration, it was as simple as that -- the interpret_bin_op function moved below for consistency and aesthetics:
diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index a958477..d19475b 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -2,25 +2,6 @@ open Ast
open Result
open Syntax_sugar
-let interpret_arith_op (op: bin_op) (n1: number) (n2: number) =
- match op, n1, n2 with
- | Add, (Int a), (Int b) -> Int (a + b)
- | Add, (Float a), (Int b) -> Float (a +. (float_of_int b))
- | Add, (Int a), (Float b) -> Float ((float_of_int a) +. b)
- | Add, (Float a), (Float b) -> Float (a +. b)
- | Subtract, (Int a), (Int b) -> Int (a - b)
- | Subtract, (Float a), (Int b) -> Float (a -. (float_of_int b))
- | Subtract, (Int a), (Float b) -> Float ((float_of_int a) -. b)
- | Subtract, (Float a), (Float b) -> Float (a -. b)
- | Multiply, (Int a), (Int b) -> Int (a * b)
- | Multiply, (Float a), (Int b) -> Float (a *. (float_of_int b))
- | Multiply, (Int a), (Float b) -> Float ((float_of_int a) *. b)
- | Multiply, (Float a), (Float b) -> Float (a *. b)
- | Divide, (Int a), (Int b) -> Float ((float_of_int a) /. (float_of_int b))
- | Divide, (Float a), (Int b) -> Float (a /. (float_of_int b))
- | Divide, (Int a), (Float b) -> Float ((float_of_int a) /. b)
- | Divide, (Float a), (Float b) -> Float (a /. b)
-
let interpret_unary_op (op: unary_op) (evaluated_expr: expr) =
match op, evaluated_expr with
| Plus, number -> ok number
@@ -43,18 +24,7 @@ let rec interpret env expr =
Env.find_var varname env
~succ:(fun env' expr -> interpret env' expr)
~err:(Error.error_at pos)
- | BinOp (pos, op, e1, e2) ->
- (let* (env1, e1') = interpret env e1 in
- let* (env2, e2') = interpret env1 e2 in
- match op, e1', e2' with
- | Add, (String _ as v1), (_ as v2) | Add, (_ as v1), (String _ as v2) ->
- let* expr' = interpret_concat_op env2 v1 v2 in
- ok (env, expr')
- | _, Number (pos, v1), Number (_, v2) ->
- ok (env2, Number (pos, interpret_arith_op op v1 v2))
- | _ ->
- Error.trace Error.Msg.invalid_binary_op pos >>= error
- )
+ | BinOp (pos, op, e1, e2) -> interpret_bin_op env (pos, op, e1, e2)
| UnaryOp (pos, op, expr) ->
let* (env', expr') = interpret env expr in
Result.fold (interpret_unary_op op expr')
@@ -74,14 +44,16 @@ let rec interpret env expr =
)
~err:(Error.error_at pos)
-and interpret_concat_op env (e1 : expr) (e2 : expr) : (expr, string) result =
+and interpret_concat_op env e1 e2 =
match e1, e2 with
| String (_, s1), String (_, s2) ->
- ok (String (dummy_pos, s1^s2))
+ ok (env, String (dummy_pos, s1^s2))
| String (_, s1), val2 ->
- let* s2 = Json.expr_to_string ~eval:interpret (env, val2) in ok (String (dummy_pos, s1^s2))
+ let* s2 = Json.expr_to_string ~eval:interpret (env, val2) in
+ ok (env, String (dummy_pos, s1^s2))
| val1, String (_, s2) ->
- let* s1 = Json.expr_to_string ~eval:interpret (env, val1) in ok (String (dummy_pos, s1^s2))
+ let* s1 = Json.expr_to_string ~eval:interpret (env, val1) in
+ ok (env, String (dummy_pos, s1^s2))
| _ ->
error Error.Msg.interp_invalid_concat
@@ -222,4 +194,52 @@ and interpret_object_field_access env (pos, scope, chain_exprs) =
(ok (env', obj))
chain_exprs
+and interpret_bin_op env (pos, op, e1, e2) =
+ let* (env1, e1') = interpret env e1 in
+ let* (env2, e2') = interpret env1 e2 in
+ match op, e1', e2' with
+ | Add, (String _ as v1), (_ as v2) | Add, (_ as v1), (String _ as v2) ->
+ interpret_concat_op env2 v1 v2
+ | _, v1, v2 ->
+ interpret_arith_op env2 (pos, op, v1, v2)
+
+and interpret_arith_op env (pos, bin_op, n1, n2) =
+ match bin_op, n1, n2 with
+ | Add, Number (_, Int a), Number (_, Int b) ->
+ ok (env, Number (pos, Int (a + b)))
+ | Add, Number (_, Float a), Number (_, Int b) ->
+ ok (env, Number (pos, Float (a +. (float_of_int b))))
+ | Add, Number (_, Int a), Number (_, Float b) ->
+ ok (env, Number (pos, Float ((float_of_int a) +. b)))
+ | Add, Number (_, Float a), Number (_, Float b) ->
+ ok (env, Number (pos, Float (a +. b)))
+ | Subtract, Number (_, Int a), Number (_, Int b) ->
+ ok (env, Number (pos, Int (a - b)))
+ | Subtract, Number (_, Float a), Number (_, Int b) ->
+ ok (env, Number (pos, Float (a -. (float_of_int b))))
+ | Subtract, Number (_, Int a), Number (_, Float b) ->
+ ok (env, Number (pos, Float ((float_of_int a) -. b)))
+ | Subtract, Number (_, Float a), Number (_, Float b) ->
+ ok (env, Number (pos, Float (a -. b)))
+ | Multiply, Number (_, Int a), Number (_, Int b) ->
+ ok (env, Number (pos, Int (a * b)))
+ | Multiply, Number (_, Float a), Number (_, Int b) ->
+ ok (env, Number (pos, Float (a *. (float_of_int b))))
+ | Multiply, Number (_, Int a), Number (_, Float b) ->
+ ok (env, Number (pos, Float ((float_of_int a) *. b)))
+ | Multiply, Number (_, Float a), Number (_, Float b) ->
+ ok (env, Number (pos, Float (a *. b)))
+ | Divide, Number (_, Int a), Number (_, Int b) ->
+ ok (env, Number (pos, Float ((float_of_int a) /. (float_of_int b))))
+ | Divide, Number (_, Float a), Number (_, Int b) ->
+ ok (env, Number (pos, Float (a /. (float_of_int b))))
+ | Divide, Number (_, Int a), Number (_, Float b) ->
+ ok (env, Number (pos, Float ((float_of_int a) /. b)))
+ | Divide, Number (_, Float a), Number (_, Float b) ->
+ ok (env, Number (pos, Float (a /. b)))
+ | Equality, v1, v2 ->
+ ok (env, Bool (pos, v1 =~ v2))
+ | _ ->
+ Error.trace Error.Msg.invalid_binary_op pos >>= error
+
let eval expr = interpret Env.empty expr
The semantic equal operator =~ compares two expr and returns a boolean. Pretty simple... for simple values, like numbers, strings, and booleans! When we start comparing objects and arrays, it becomes muddy quite fast.
I had to account for RuntimeObject, that encapsulates its own environment, and Array, that can contain RuntimeObject. If you guessed that laziness (not mine, the language's laziness) could be a source of pain here, you guessed right. These expressions aren't fully evaluated yet. So, I had to fully evaluate each object field in order to be able to compare every single variant, ending with this:
diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index d19475b..de0dd80 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -97,6 +97,25 @@ and interpret_seq env exprs =
interpret env expr >>= fun (env', _) ->
interpret env' (Seq exprs')
+and fully_evaluate_obj_fields obj_env fields =
+ let get_obj_id env =
+ match Env.Map.find_opt "self" env with
+ | Some (ObjectPtr (obj_id, _)) -> Some obj_id
+ | _ -> None
+ in
+ match get_obj_id obj_env with
+ | None -> ok Env.Map.empty
+ | Some obj_id ->
+ ObjectFields.fold (fun field acc ->
+ let* evaluated_fields = acc in
+ let key = Env.uniq_field_ident obj_id field in
+ match Env.Map.find_opt key obj_env with
+ | Some expr ->
+ let* (_, evaluated) = interpret obj_env expr in
+ ok (Env.Map.add field evaluated evaluated_fields)
+ | None -> acc
+ ) fields (ok Env.Map.empty)
+
and interpret_object env (pos, entries) =
let* obj_id = Env.Id.generate () in
let obj_env = Env.add_local "self" (ObjectPtr (obj_id, Self)) env in
@@ -237,6 +256,34 @@ and interpret_arith_op env (pos, bin_op, n1, n2) =
ok (env, Number (pos, Float ((float_of_int a) /. b)))
| Divide, Number (_, Float a), Number (_, Float b) ->
ok (env, Number (pos, Float (a /. b)))
+ | Equality, RuntimeObject (_, env1, fields1), RuntimeObject (_, env2, fields2) ->
+ if not (ObjectFields.equal fields1 fields2) then
+ ok (env, Bool (pos, false))
+ else
+ let* evaluated1 = fully_evaluate_obj_fields env1 fields1 in
+ let* evaluated2 = fully_evaluate_obj_fields env2 fields2 in
+ let* are_equal = ObjectFields.fold (fun field acc ->
+ let* all_equal = acc in
+ if not all_equal then ok false
+ else
+ match (Env.Map.find_opt field evaluated1, Env.Map.find_opt field evaluated2) with
+ | Some v1, Some v2 -> ok (v1 =~ v2)
+ | _, _ -> ok false
+ ) fields1 (ok true) in
+ ok (env, Bool (pos, are_equal))
+ | Equality, Array (_, items1), Array (_, items2) ->
+ if List.length items1 <> List.length items2 then
+ ok (env, Bool (pos, false))
+ else
+ let* are_equal = List.fold_left2 (fun acc item1 item2 ->
+ let* all_equal = acc in
+ if not all_equal then ok false
+ else
+ match interpret_bin_op env (pos, Equality, item1, item2) with
+ | Ok (_, Bool (_, eq)) -> ok eq
+ | _ -> ok false
+ ) (ok true) items1 items2 in
+ ok (env, Bool (pos, are_equal))
| Equality, v1, v2 ->
ok (env, Bool (pos, v1 =~ v2))
| _ ->
I know. Ugly is a compliment, to say the least!
And the magic operator implementation:
(* Semantic equality for expressions.
Standard structural equality (=) includes position information, which means
two expressions that are semantically identical but parsed from different
locations would be considered unequal. This function compares expressions
based solely on their semantic content.
Usage: expr1 =~ expr2 or semantic_equal expr1 expr2 *)
let rec semantic_equal evaluated_expr1 evaluated_expr2 =
match (evaluated_expr1, evaluated_expr2) with
| Null _, Null _ -> true
| Number (_, n1), Number (_, n2) -> n1 = n2
| Bool (_, b1), Bool (_, b2) -> b1 = b2
| String (_, s1), String (_, s2) -> s1 = s2
| Ident (_, id1), Ident (_, id2) -> id1 = id2
| Array (_, items1), Array (_, items2) ->
List.length items1 = List.length items2
&& List.for_all2 semantic_equal items1 items2
| ParsedObject (_, entries1), ParsedObject (_, entries2) ->
List.length entries1 = List.length entries2
&& List.for_all2 object_entry_semantic_equal entries1 entries2
| RuntimeObject (_, env1, fields1), RuntimeObject (_, env2, fields2) ->
runtime_object_semantic_equal (env1, fields1) (env2, fields2)
| ObjectPtr (id1, scope1), ObjectPtr (id2, scope2) ->
id1 = id2 && scope1 = scope2
| _, _ ->
(* different types can't be compared, as well as operations,
just representable values such as number, boolean, string, object, array *)
false
It is just a function: semantic_equal. But there's also an alias: =~.
let ( =~ ) = semantic_equal
Pretty straightforward -- until we get to runtime_object_semantic_equal:
and object_entry_semantic_equal entry1 entry2 =
match (entry1, entry2) with
| ObjectField (name1, e1), ObjectField (name2, e2) ->
name1 = name2 && semantic_equal e1 e2
| ObjectExpr e1, ObjectExpr e2 ->
semantic_equal e1 e2
| _, _ -> false
and runtime_object_semantic_equal (env1, fields1) (env2, fields2) =
(* RuntimeObjects contain lazy (unevaluated) fields.
Full semantic comparison of field values requires
evaluation, which must be done in the interpreter. *)
let get_obj_id env =
match Env.Map.find_opt "self" env with
| Some (ObjectPtr (obj_id, _)) -> Some obj_id
| _ -> None
in
(match (get_obj_id env1, get_obj_id env2) with
| Some obj_id1, Some obj_id2 ->
ObjectFields.equal fields1 fields2
&& ObjectFields.for_all (fun field ->
let key1 = Env.uniq_field_ident obj_id1 field in
let key2 = Env.uniq_field_ident obj_id2 field in
match (Env.Map.find_opt key1 env1, Env.Map.find_opt key2 env2) with
| Some v1, Some v2 -> semantic_equal v1 v2
| _, _ -> false
) fields1
| _, _ -> false
)
There's nothing wrong with it except that we can't guarantee it has been previously evaluated.
This is bugging me, but I have some ideas to try.
For now, this ugly and messy code was able to parse and interpret successfully this sample file:
// samples/equality.jsonnet
{
eq_number_number: 1 == 1,
dif_number_number: 1 == 2,
dif_number_str: "1" == 1,
eq_str_str: "42" == "42",
dif_str_str: "42" == "0",
eq_array_array: [1,2,3] == [1,2,3],
dif_array_array: [1,2,3] == [3,2,1],
eq_obj_obj: {x: 1, y: 2, z: 3} == {z: 3, x: 1, y: 2},
dif_obj_obj: {a: 1, b: 2} == {b: 2, c: 3},
eq_complex_obj_complex_obj: [{}, { x: 3 - 1 }] == [{}, { x: 2 }],
dif_complex_obj_complex_obj: [{ a: 1 }, { b: 2 }] == [{ b: 2 }, { a: 1 }],
}
And the cram test:
diff --git a/test/cram/operations.t b/test/cram/operations.t
index 3e23509..bb1c93a 100644
--- a/test/cram/operations.t
+++ b/test/cram/operations.t
@@ -15,3 +15,18 @@
$ tsonnet ../../samples/bitwise.jsonnet
-2
+
+ $ tsonnet ../../samples/equality.jsonnet
+ {
+ "dif_array_array": false,
+ "dif_complex_obj_complex_obj": false,
+ "dif_number_number": false,
+ "dif_number_str": false,
+ "dif_obj_obj": false,
+ "dif_str_str": false,
+ "eq_array_array": true,
+ "eq_complex_obj_complex_obj": true,
+ "eq_number_number": true,
+ "eq_obj_obj": true,
+ "eq_str_str": true
+ }
Conclusion
Equality works -- all the test cases pass, from simple numbers to nested objects with expressions. But the implementation is messier than I'd like. The core issue is that RuntimeObject and Array can contain unevaluated expressions, which forces us to evaluate on-the-fly during comparison. That's both inefficient and error-prone.
The real problem isn't the equality logic itself, it's that we're mixing concerns. The semantic_equal function in the AST shouldn't need to know about evaluation state -- that's the interpreter's job. We're essentially doing interpretation work inside what should be a pure comparison function.
Phantom types would let us encode evaluation state at compile time: expr becomes 'a expr where 'a can be either Unevaluated or Evaluated. Then semantic_equal would have the signature Evaluated expr -> Evaluated expr -> bool, and the compiler would refuse to compile if we tried to compare unevaluated expressions. Push the evaluation concern back where it belongs. But that's an experiment for later.
For now, though, equality works. The tests pass. I'll clean this up when I tackle the broader evaluation state problem -- which will help the Json module too, since it also suffers from needing to evaluate things at serialization time. I just hope this clean up does not trigger a big refactoring. 🤞
The entire diff can be found here.
Thanks for reading Bit Maybe Wise! Subscribe to watch me discover that "simple" features are never actually simple.
Photo by Markus Winkler on Unsplash
Top comments (0)