- rtshkmr's digital garden/
- Readings/
- Books/
- Real World OCaml: Functional Programming for the Masses/
- Chapter 5: Records/
Chapter 5: Records
Table of Contents
Basics #
- Syntax pointers:
the keynames within records must start with a lowercase letter.
here’s a basic example
open Core (* record defined here -- corresponds to /etc/services file on unix *) type service_info = { service_name : string; port : int; protocol : string; };; # require "re";; let service_info_of_string line = let matches = let pat = "([a-zA-Z]+)[ \t]+([0-9]+)/([a-zA-Z]+)" in Re.exec (Re.Posix.compile_pat pat) line in { service_name = Re.Group.get matches 1; port = Int.of_string (Re.Group.get matches 2); protocol = Re.Group.get matches 3; } let ssh = service_info_of_string "ssh 22/udp # SSH Remote Login Protocol";;accessing fields from a record:
- field value accessing can be done with dot notation
- we may pattern match as well
- how the compiler infers the record types:
- when no ambiguity: the compiler bases its inference on the field names used in constructing the record. That inference is most straightforward when each field name belongs to only one record type.
- fields within the record can have an abstract type \(\implies\) the record is polymorphic
open Core (* record defined here -- corresponds to /etc/services file on unix *) type service_info = { service_name : string; port : int; protocol : string; };; # require "re";; let service_info_of_string line = let matches = let pat = "([a-zA-Z]+)[ \t]+([0-9]+)/([a-zA-Z]+)" in Re.exec (Re.Posix.compile_pat pat) line in { service_name = Re.Group.get matches 1; port = Int.of_string (Re.Group.get matches 2); protocol = Re.Group.get matches 3; };; (* polymorphic record, with abstract type: *) type 'a with_line_num = { item: 'a; line_num: int } let parse_lines parse file_contents = let lines = String.split ~on:'\n' file_contents in List.mapi lines ~f:(fun line_num line -> { item = parse line; line_num = line_num + 1; }) ;; let service_file_content = "rtmp 1/ddp # Routing Table Maintenance Protocol tcpmux 1/udp # TCP Port Service Multiplexer tcpmux 1/tcp # TCP Port Service Multiplexer";; (* a' may be the service_info record *) parse_lines service_info_of_string service_file_content;; (* a' may be an integer *) parse_lines Int.of_string "1\n10\n100\n1000";;
Pattern Matching & Exhaustiveness #
Record patterns are irrefutable, meaning that a record pattern match will never fail at runtime (since the fields are fixed). Therefore, we can pattern-match using a single pattern. In general, patterns for types with a fixed structure, like records and tuples, are irrefutable, unlike types with variable structures like lists and variants.
Also we can do a partial pattern match, though that may cause a drift in usage expectation if the record changes silently – this means that when new fields are added to the record, code that should be updated to react to the presence of those new fields will not be flagged by the compiler.
There’s optional warnings (see ocaml -warn-help) with which we can check for some of these sources of error. RULE OF THUMB: treat warnings as errors in dev but allow them for prod/packaged for distribution.
Use _ to signal fields that we don’t care about.
(* suppose the record type is this: *)
type service_info =
{ service_name : string;
port : int;
protocol : string;
comment : string option;
}
(* if warnings for +9 is turned on *)
let service_info_to_string
{ service_name = name; port = port; protocol = prot }
=
sprintf "%s %i/%s" name port prot
;;
(* Then we can expect this warning:
Line 2, characters 5-59:
Warning 9 [missing-record-field-pattern]: the following labels are not bound in this record pattern:
comment
Either bind these labels explicitly or add '; _' to the pattern.
val service_info_to_string : service_info -> string = <fun>
*)
(* better way to handle fields we don't care about: *)
let service_info_to_string
{ service_name = name; port = port; protocol = prot; _ }
=
sprintf "%s %i/%s" name port prot
;;
Field Punning (and label punning) #
Punning gives us expressive syntax while allowing us to be terse.
This style of punning allows us to propagate the same names throughout the codebase so names are consistent, making it easier to navigate the source.
(* PUNNING @ CREATION *)
let service_info_of_string line =
(* first, split off any comment *)
let (line,comment) =
match String.rsplit2 line ~on:'#' with
| None -> (line,None)
| Some (ordinary,comment) -> (ordinary, Some comment)
in
(* now, use a regular expression to break up the
service definition *)
let matches =
Re.exec
(Re.Posix.compile_pat
"([a-zA-Z]+)[ \t]+([0-9]+)/([a-zA-Z]+)")
line
in
let service_name = Re.Group.get matches 1 in
let port = Int.of_string (Re.Group.get matches 2) in
let protocol = Re.Group.get matches 3 in
{ service_name; port; protocol; comment };; (*punned*)
(* PUNNING @ REFERENCE: Field and label punned version: *)
let create_service_info ~service_name ~port ~protocol ~comment =
{ service_name; port; protocol; comment };;
(* longer version, without punning: *)
let create_service_info
~service_name:service_name ~port:port
~protocol:protocol ~comment:comment =
{ service_name = service_name;
port = port;
protocol = protocol;
comment = comment;
};;
Reusing Field Names #
When reusing field names, the compiler(typechecker) may exhibit implicit behaviour that may be non-obvious at first glance. This is why there are primitives for us to explicitly do type annotations.
Example of non-obvious compilation:
(* consider the following record types: *)
type log_entry =
{ session_id: string;
time: Time_ns.t;
important: bool;
message: string;
}
type heartbeat =
{ session_id: string;
time: Time_ns.t;
status_message: string;
}
type logon =
{ session_id: string;
time: Time_ns.t;
user: string;
credentials: string;
}
(* B1: for common field names, typechecker picks the most recent definition*)
let get_session_id t = t.session_id;;
(* val get_session_id : logon -> string = <fun> <=== this is from logon (most recent) *)
(* B2: the order of names that the typechecker has to do a lookup for affects whether it sees an ambiguity *)
let status_and_session t = (t.status_message, t.session_id);;
(* this resolves to heartbeat without ambiguity because it needs to resolve status_message and that field name is unique to heartbeat so our type is resolved without issue.
val status_and_session : heartbeat -> string * string = <fun>
*)
let session_and_status t = (t.session_id, t.status_message);;
(* this can't be resolved because it needs to resolve session_id and that field name is NOT unique to a particular record type.
Line 1, characters 45-59:
Error: This expression has type logon
There is no field status_message within type logon
*)
(* B3: type annotations remove ambiguity *)
let session_and_status (t:heartbeat) = (t.session_id, t.status_message);;
Idiom: pack types into modules #
Each record type can be given a namespace within which related values can be placed. Conventionally, the record type is just set to t.
The idea here is to qualify a record by the module it comes from.
(* Follows the "pack similar record types into modules" idiom *)
module Log_entry = struct
type t =
{ session_id: string;
time: Time_ns.t;
important: bool;
message: string;
}
end
module Heartbeat = struct
type t =
{ session_id: string;
time: Time_ns.t;
status_message: string;
}
end
module Logon = struct
type t =
{ session_id: string;
time: Time_ns.t;
user: string;
credentials: string;
}
end
(* creation function for Log_entry *)
let create_log_entry ~session_id ~important message =
{ Log_entry.time = Time_ns.now ();
Log_entry.session_id;
Log_entry.important;
Log_entry.message
};;
It’s possible for us to be more terse. The mental model here extends onto that case of ambiguity that we saw earlier (B2) in that the typechecker/compiler only needs to be shown how to resolve ambiguity once.
(* Reference 1: without the conciseness: *)
let create_log_entry ~session_id ~important message =
{ Log_entry.time = Time_ns.now ();
Log_entry.session_id;
Log_entry.important;
Log_entry.message
};;
(* Reference 2: adding a type annotation: *)
let create_log_entry ~session_id ~important message : Log_entry.t =
{ time = Time_ns.now (); session_id; important; message };;
(* Reference 3: annotation used for pattern matching *)
let message_to_string { Log_entry.important; message; _ } =
if important then String.uppercase message else message;;
(* R4: annotation used for dot-access *)
let is_important t = t.Log_entry.important;;
for the pattern matching type annotation, the use of
.has 2 different ways. Considerlet is_important t = t.Log_entry.important;;.first dot: is a record field access, with everything to the right of the dot being interpreted as a field name;
second dot: accessing the contents of a module, referring to the record field important from within the module
Log_entry. The fact thatLog_entryis capitalized and so can’t be a field name is what disambiguates the two uses.
Type-Directed Constructor Disambiguation #
We can avoid qualifying the record field if we allow the compiler to infer the type of record in question.
(* R3: old *)
let message_to_string { Log_entry.important; message; _ } =
if important then String.uppercase message else message;;
(* R3: new *)
let message_to_string ({ important; message; _ } : Log_entry.t) =
if important then String.uppercase message else message;;
(* R4: old *)
let is_important t = t.Log_entry.important;;
(* R4: new *)
let is_important (t:Log_entry.t) = t.important;;
The meaning of “constructor” here is special in OCaml
constructor constructs the value of a concrete data type
variant constructor \(\implies\) builds tagged union types (sum type cases)
e.g.
Some,None,Ok,Error…record constructor \(\implies\) builds structured records (product types)
e.g
{ field1 = v1; field2 = v2 }polymorphic variant constructor \(\implies\) builds polymorphic variant cases
e.g.
`Foo,`Bar
so the disambiguation here relates to which data-type constructor is being referred to when multiple modules or types define constructors with the same names.
Functional updates #
There’s a functional update syntax that is terse and allows us to build records with specific field changes from another record. Functional updates make your code independent of the identity of the fields in the record that are not changing. It is a good choice for cases where it’s not important that you consider every field of the record when making a change
with keyword used here, the value-assignments on RHS are the diffs to be applied to the record on the LHS
(* functional update*)
let register_heartbeat t hb =
{ t with last_heartbeat_time = hb.Heartbeat.time };;
(* original *)
let register_heartbeat t hb =
{ addr = t.addr;
port = t.port;
user = t.user;
credentials = t.credentials;
last_heartbeat_time = hb.Heartbeat.time;
};;
Mutable Fields in Records #
This helps the immutable programming primitives that OCaml uses.
KIV chapter 8 for more on Imperative programming.
(* mutable record fields *)
type client_info =
{ addr: Core_unix.Inet_addr.t;
port: int;
user: string;
credentials: string;
mutable last_heartbeat_time: Time_ns.t;
mutable last_heartbeat_status: string;
}
(* mutation of the mutable fields when we register heartbeat. *)
let register_heartbeat t (hb:Heartbeat.t) =
t.last_heartbeat_time <- hb.time;
t.last_heartbeat_status <- hb.status_message;;
First-class fields #
Typically, we’d access fields using dot-operators and in that way they don’t behave as first-class objects – we can’t reer to the field itself as a value or pass it around.
Library-provided functionality can make fields first class. Seems like this behaviour is Jane Street-specific (within Base), where there are some patterns to generate accessor functions. It seems like the annotation is a meta annotation that uses fieldslib to generate some accessor functions (specific to the fields) and a Fields submodule to the existing module which gives us a bunch of helper functions to work with record fields.
This seems to be the first introduction of meta-programming directives [@@deriving fields]
Example:
#require "ppx_jane";;
module Logon = struct
type t =
{ session_id: string;
time: Time_ns.t;
user: string;
credentials: string;
}
[@@deriving fields]
end;;
(* this gives the following generated sub-module
module Logon :
sig
type t = {
session_id : string;
time : Time_ns.t;
user : string;
credentials : string;
}
val credentials : t -> string
val user : t -> string
val time : t -> Time_ns.t
val session_id : t -> string
module Fields :
sig
val names : string list
val credentials :
([< `Read | `Set_and_create ], t, string) Field.t_with_perm
val user :
([< `Read | `Set_and_create ], t, string) Field.t_with_perm
val time :
([< `Read | `Set_and_create ], t, Time_ns.t) Field.t_with_perm
...
end
end
*)
(* using this *)
let get_users logons =
List.dedup_and_sort ~compare:String.compare
(List.map logons ~f:Logon.user);;
In the code above, we can also see access-control markers for the fields. Inspecting Field.get;; shows - : ('b, 'r, 'a) Field.t_with_perm -> 'r -> 'a = <fun>
let show_field field to_string record =
let name = Field.name field in
let field_string = to_string (Field.get field record) in
name ^ ": " ^ field_string;;
let logon =
{ Logon.
session_id = "26685";
time = Time_ns.of_string_with_utc_offset "2017-07-21 10:11:45Z";
user = "yminsky";
credentials = "Xy2d9W";
};;
show_field Logon.Fields.user Fn.id logon;; (*NOTE: we use the =Fn= module here*)
(*********)
(* ====- *)
(*********)
let print_logon logon =
let print to_string field =
printf "%s\n" (show_field field to_string logon)
in
Logon.Fields.iter
~session_id:(print Fn.id)
~time:(print Time_ns.to_string)
~user:(print Fn.id)
~credentials:(print Fn.id);;
print_logon logon;;
Fn module is a collection of useful primitives that deal with other functions (e.g. Identity function is just Fn.id – see the Fun library here)
For example, there’s an iterator that allows us to iterate all first-class fields:
- nice side effect of this approach is that it helps you adapt your code when the fields of a record change.