2018年4月16日

Elixir4 StringsAndBinaries

String Literals

elixir 有兩種 string: single-quoted, double-quoted

  • strings 是使用 UTF-8 encoding
  • escape characters:

    \a BEL (0x07) \b BS (0x08) \d DEL (0x7f)
    \e ESC (0x1b) \f FF (0x0c) \n NL (0x0a)
    \r CR (0x0d) \s SP (0x20) \t TAB (0x09)
    \v VT (0x0b) \uhhh 1–6 hex digits \xhh 2 hex digits
  • 使用 #{...} 語法,處理 string interpolaton

    iex(1)> name="You"
    "You"
    iex(2)> "Hello, #{String.capitalize name}!"
    "Hello, You!"
  • 支援 heredocs """ ... """,會自動去掉每一行文字最前面的 tab/space

    iex(11)> IO.write """
    ...(11)>         my
    ...(11)>         string
    ...(11)>         """
    my
    string
    :ok
    
    iex(14)> IO.write "
    ...(14)>         my
    ...(14)>         string
    ...(14)>         "
    
            my
            string
            :ok

sigil

以下為 sigil types

~C 沒有 escaping or interpolation
~c 以 '' string 的方式 escaped and interpolated
~D 日期格式 yyyy-mm-dd
~N 原始的 DateTime 格式 yyyy-mm-dd hh:mm:ss[.ddd]
~R 沒有 escaping or interpolation 的 regular expression
~r 有 escaped and interpolated 的 regular expression
~S 沒有 escaping or interpolation 的 string
~s 以 "" string 的方式  escaped and interpolated
~T 時間格式 hh:mm:ss[.dddd]
~W whitespace-delimited words 沒有 no escaping or interpolation
~w whitespace-delimited words 有 escaping and interpolation
iex(1)> ~C[1\n2#{1+2}]
'1\\n2\#{1+2}'
iex(2)> ~c"1\n2#{1+2}"
'1\n23'
iex(3)> ~S[1\n2#{1+2}]
"1\\n2\#{1+2}"
iex(4)> ~s/1\n2#{1+2}/
"1\n23"
iex(5)> ~W[the c#{'a'}t sat on the mat]
["the", "c\#{'a'}t", "sat", "on", "the", "mat"]
iex(6)> ~w[the c#{'a'}t sat on the mat]
["the", "cat", "sat", "on", "the", "mat"]
iex(7)> ~D<1999-12-31>
~D[1999-12-31]
iex(8)> ~T[12:34:56]
~T[12:34:56]
iex(9)> ~N{1999-12-31 23:59:59}
~N[1999-12-31 23:59:59]

~W ~w 有增加 a, c, s 的選項,分別傳回 atoms, list, string of chars

iex(11)> ~w[the c#{'a'}t sat on the mat]a
[:the, :cat, :sat, :on, :the, :mat]
iex(12)> ~w[the c#{'a'}t sat on the mat]c
['the', 'cat', 'sat', 'on', 'the', 'mat']
iex(13)> ~w[the c#{'a'}t sat on the mat]s
["the", "cat", "sat", "on", "the", "mat"]

也可使用 """

iex(15)> ~w"""
...(15)> test
...(15)> 111
...(15)> """
["test", "111"]
iex(16)> ~r"""
...(16)> hello
...(16)> """i
~r/hello\n/i

single-quoted strings: lists of character codes

single-quoted strings 是 list of integer values,每個值都代表 string 的 codepoint。

'string' 如果裡面儲存的資料是可以列印的,會直接列印該文字,但其實內部是儲存為 int list

iex(1)> str = 'wombat'
'wombat'
iex(2)> is_list str
true
iex(3)> length str
6
iex(4)> Enum.reverse str
'tabmow'
iex(5)> [ 67, 65, 84 ]
'CAT'
iex(6)> :io.format "~w~n", [ str ]
[119,111,109,98,97,116]
:ok
iex(7)> List.to_tuple str
{119, 111, 109, 98, 97, 116}
iex(8)> str ++ [0]
[119, 111, 109, 98, 97, 116, 0]
iex(21)> '∂x/∂y'
[8706, 120, 47, 8706, 121]
iex(22)> 'pole' ++ 'vault'
'polevault'
iex(23)> 'pole' -- 'vault'
'poe'
iex(24)> List.zip [ 'abc', '123' ]
[{97, 49}, {98, 50}, {99, 51}]
iex(25)> [ head | tail ] = 'cat'
'cat'
iex(26)> head
99
iex(27)> tail
'at'
iex(28)> [ head | tail ]
'cat'

defmodule Parse do

  def number([ ?- | tail ]), do: _number_digits(tail, 0) * -1
  def number([ ?+ | tail ]), do: _number_digits(tail, 0)
  def number(str),           do: _number_digits(str,  0)

  defp _number_digits([], value), do: value
  defp _number_digits([ digit | tail ], value)
  when digit in '0123456789' do
    _number_digits(tail, value*10 + digit - ?0)
  end
  defp _number_digits([ non_digit | _ ], _) do
    raise "Invalid digit '#{[non_digit]}'"
  end
end
iex(1)> Parse.number('123')
123
iex(2)> Parse.number('+123')
123
iex(3)> Parse.number('-123')
-123
iex(4)> Parse.number('+a')
** (RuntimeError) Invalid digit 'a'
    parse.exs:13: Parse._number_digits/2

Binaries

<< term,… >>
iex(1)> b = << 1, 2, 3 >>
<<1, 2, 3>>
iex(2)> byte_size b
3
iex(3)> bit_size b
24

iex(5)> b = << 1::size(2), 1::size(3) >>
<<9::size(5)>>
iex(6)> byte_size b
1
iex(7)> bit_size b
5

可儲存 integers, floats, binaries

iex(13)> int = << 1 >>
<<1>>
iex(14)> float = << 2.5 :: float >>
<<64, 4, 0, 0, 0, 0, 0, 0>>
iex(15)> mix = << int :: binary, float :: binary >>
<<1, 64, 4, 0, 0, 0, 0, 0, 0>>

IEEE 754 float has a sign bit, 11 bits of exponent, and 52 bits of mantissa,可直接用 pattern matching 的方式拆解 float

iex(22)> << sign::size(1), exp::size(11), mantissa::size(52) >> = << 3.14159::float >>
<<64, 9, 33, 249, 240, 27, 134, 110>>
iex(23)> (1 + mantissa / :math.pow(2, 52)) * :math.pow(2, exp-1023)
3.14159

Double-Quoted Strings 就等同於 Binaries,但字串長度並不等於 bytes size,因為 string 是使用 UTF-8 encoding

iex(1)> dqs = "∂x/∂y"
"∂x/∂y"
iex(2)> String.length dqs
5
iex(3)> byte_size dqs
9
iex(4)> String.at(dqs, 0)
"∂"
iex(5)> String.codepoints(dqs)
["∂", "x", "/", "∂", "y"]
iex(6)> String.split(dqs, "/")
["∂x", "∂y"]

使用 string(binary) 的 elixir library

# at(str, offset)
#  在某個位置的 char

iex> String.at("∂og", 0)
"∂"
iex> String.at("∂og", -1)
"g"

# capitalize(str)
#  轉成小寫, 首字元大寫
iex> String.capitalize "école"
"École"
iex> String.capitalize "ÎÎÎÎÎ"
"Îîîîî"


# codepoints(str)
#  字串的 codepoints
iex> String.codepoints("José's ∂øg")
["J", "o", "s", "é", "'", "s", " ", "∂", "ø", "g"]

# downcase(str)
#  轉小寫
iex> String.downcase "ØRSteD"
"ørsted"

# duplicate(str, n)
#  重複 n 次
iex> String.duplicate "Ho! ", 3
"Ho! Ho! Ho! "

# ends_with?(str, suffix | [ suffixes ])
#  是否以某一個 suffixes 結束
iex> String.ends_with? "string", ["elix", "stri", "ring"]
true


# first(str)
#  第一個 char
iex> String.first "∂og"
"∂"


# graphemes(str)
#  與 codepoints 不同
iex> String.codepoints "noe\u0308l"
["n", "o", "e", "¨", "l"]
iex> String.graphemes "noe\u0308l"
["n", "o", "ë", "l"]

# jaro_distance
#  以 0~1 float 代表兩個 string 的差異
iex> String.jaro_distance("jonathan", "jonathon")
0.9166666666666666
iex> String.jaro_distance("josé", "john")
0.6666666666666666


# last(str)
#  最後一個字元
iex> String.last "∂og"
"g"

# length(str)
#  字串長度, 用字元計算
Returns the number of graphemes in str.
iex> String.length "∂x/∂y"
5

# myers_difference
#  由一個 string 轉換成另一個的過程
iex> String.myers_difference("banana", "panama")
[del: "b", ins: "p", eq: "ana", del: "n", ins: "m", eq: "a"]


# next_codepoint(str)
#  分割第一個字元跟剩下的字串

defmodule MyString do
    def each(str, func), do: _each(String.next_codepoint(str), func)
    defp _each({codepoint, rest}, func) do
        func.(codepoint)
            _each(String.next_codepoint(rest), func)
    end
    defp _each(nil, _), do: []
end
MyString.each "∂og", fn c -> IO.puts c end
# next_grapheme(str)
#  跟 next_codepoint 一樣,但 return graphemes,以 :no_grapheme 結束


# pad_leading(str, new_length, padding \\ 32)
#  以 str 結束,至少 new_length 這麼長的字串,字串前面的以 padding 補上
iex> String.pad_leading("cat", 5, ?>)
">>cat"


# pad_trailing(str, new_length, padding \\ " ")
#  同 pad_leading 但 padding 放在字串的後面
iex> String.pad_trailing("cat", 5)
"cat "


# printable?(str)
#  是否為 printable chars
iex> String.printable? "José"
true
iex> String.printable? "\x{0000} a null"
false


# replace(str, pattern, replacement, options \\ [global: true, insert_replaced: nil])
#  替換字串, :global 代表每一次都替換, :insert_replaced 代表要 insert 在後面 offset 個字元
iex> String.replace "the cat on the mat", "at", "AT"
"the cAT on the mAT"
iex> String.replace "the cat on the mat", "at", "AT", global: false
"the cAT on the mat"
iex> String.replace "the cat on the mat", "at", "AT", insert_replaced: 0
"the catAT on the matAT"
iex> String.replace "the cat on the mat", "at", "AT", insert_replaced: [0,2]
"the catATat on the matATat"


# reverse(str)
iex> String.reverse "pupils"
"slipup"
iex> String.reverse "∑ƒ÷∂"
"∂÷ƒ∑"

# slice(str, offset, len)
#  由 offset 位置開始取 len 個 chars
iex> String.slice "the cat on the mat", 4, 3
"cat"
iex> String.slice "the cat on the mat", -3, 3
"mat"

# split(str, pattern \\ nil, options \\ [global: true])
#  以 pattern 為分割點(預設為 space),切割 string
iex> String.split " the cat on the mat "
["the", "cat", "on", "the", "mat"]
iex> String.split "the cat on the mat", "t"
["", "he ca", " on ", "he ma", ""]
iex> String.split "the cat on the mat", ~r{[ae]}
["th", " c", "t on th", " m", "t"]
iex> String.split "the cat on the mat", ~r{[ae]}, parts: 2
["th", " cat on the mat"]


# starts_with?(str, prefix | [ prefixes ])
#  是否以 prefix 開頭
iex> String.starts_with? "string", ["elix", "stri", "ring"]
true


# trim(str)
#  去掉前後的 whitespaces
iex> String.trim "\t Hello \r\n"
"Hello

# trim(str, character)
#  去掉前後的 character
iex> String.trim "!!!SALE!!!", "!"
"SALE"

# trim_leading(str)
iex> String.trim_leading "\t\f Hello\t\n"
"Hello\t\n"

# trim_leading(str, character)
iex> String.trim_leading "!!!SALE!!!", "!"
"SALE!!!"

# trim_trailing(str)
iex> String.trim_trailing(" line \r\n")
" line"

# trim_trailing(str, character)
iex> String.trim_trailing "!!!SALE!!!", "!"
"!!!SALE"

# upcase(str)
iex> String.upcase "José Ørstüd"
"JOSÉ ØRSTÜD"

# valid?(str)
#  判斷是否為單一 char
iex> String.valid? "∂"
true
iex> String.valid? "∂og"
false

Binaries and Pattern Matching

可用在 Binary 的 type: binary, bits, bitstring, bytes, float, integer, utf8, utf16, and utf32

  • size(n): size in bits
  • signed/unsigned
  • endianness: big/little/native
<< length::unsigned-integer-size(12), flags::bitstring-size(4) >> = data

以 Binary 方式處理 String

defmodule Utf8 do
  def each(str, func) when is_binary(str), do: _each(str, func)

  # 用 binary pattern matching 以一個 utf8 字元為 head
  defp _each(<< head :: utf8, tail :: binary >>, func) do
    func.(head)
    _each(tail, func)
  end

  defp _each(<<>>, _func), do: []
end

Utf8.each "∂og", fn char -> IO.puts char end

Control Flow

elixir 有提供這些語法,但應該盡量使用 pattern matching 的方式處理

if, unless

iex(1)> if 1 == 1, do: "true part", else: "false part"
"true part"
iex(2)> if 2 == 1, do: "true part", else: "false part"
"false part"
iex(4)> if 1==1 do
...(4)>   "true"
...(4)> else
...(4)>   "false"
...(4)> end
"true"
iex(6)> unless 1 == 1, do: "error", else: "OK"
"OK"
iex(8)> unless 1 == 2, do: "OK", else: "error"
"OK"
iex(10)> unless 1 == 2 do
...(10)>   "OK"
...(10)> else
...(10)>   "error"
...(10)> end
"OK"

cond

FizzBuzz: 如果是 3 的倍數就印 Fizz,如果是 5 的倍數就印 Buzz,如果同時是 3 和 5 的倍數就印 FizzBuzz

defmodule FizzBuzz do

  def upto(n) when n > 0, do: _upto(1, n, [])

  defp _upto(_current, 0, result),  do: Enum.reverse result

  defp _upto(current, left, result) do
    next_answer =
      cond do
        rem(current, 3) == 0 and rem(current, 5) == 0 ->
          "FizzBuzz"
        rem(current, 3) == 0 ->
          "Fizz"
        rem(current, 5) == 0 ->
          "Buzz"
        true ->
          current
      end
    _upto(current+1, left-1, [ next_answer | result ])
  end
end

因為處理順序的關係,必須要在最後 Enum.reverse result

iex(1)> FizzBuzz.upto(10)
[1, 2, "Fizz", 4, "Buzz", "Fizz", 7, 8, "Fizz", "Buzz"]

所以就反過來,從 n 開始做

defmodule FizzBuzz do

  def upto(n) when n > 0, do: _downto(n, [])

  defp _downto(0, result),  do: result

  defp _downto(current, result) do
    next_answer =
      cond do
        rem(current, 3) == 0 and rem(current, 5) == 0 ->
          "FizzBuzz"
        rem(current, 3) == 0 ->
          "Fizz"
        rem(current, 5) == 0 ->
          "Buzz"
        true ->
          current
      end
    _downto(current-1, [ next_answer | result ])
  end
end

改用 Enum.map 重複呼叫 fizzbuzz

defmodule FizzBuzz do

  def upto(n) when n > 0 do
    1..n |> Enum.map(&fizzbuzz/1)
  end

  defp fizzbuzz(n) do
    cond do
      rem(n, 3) == 0 and rem(n, 5) == 0 ->
        "FizzBuzz"
      rem(n, 3) == 0 ->
        "Fizz"
      rem(n, 5) == 0 ->
        "Buzz"
      true ->
        n
    end
  end
end

最好的方式是用 pattern matching 的方式

defmodule FizzBuzz do

  def upto(n) when n > 0 do
    1..n |> Enum.map(&fizzbuzz/1)
  end

  defp fizzbuzz(n) when rem(n, 3) == 0 and rem(n, 5) == 0, do: "FizzBuzz"
  defp fizzbuzz(n) when rem(n, 3) == 0, do: "Fizz"
  defp fizzbuzz(n) when rem(n, 5) == 0, do: "Buzz"
  defp fizzbuzz(n), do: n

end

case

File.open 會遇到兩種 return

case File.open("case.ex")  do

{ :ok, file } ->
  IO.puts "First line: #{IO.read(file, :line)}"

{ :error, reason } ->
  IO.puts "Failed to open file: #{reason}"

end

加上 guard clause

defrecord Person, name: "", age: 0

defmodule Bouncer do

  dave = Person.new name: "You", age: 27

  case dave do

    record = Person[age: age] when is_number(age) and age >= 21 ->
      IO.puts "You are cleared to enter the Foo Bar, #{record.name}"

    _ ->
      IO.puts "Sorry, no admission"

  end
end

Expcetions

iex(1)> raise "Give Up"
** (RuntimeError) Give Up

iex(1)> raise RuntimeError
** (RuntimeError) runtime error

iex(1)> raise RuntimeError, message: "override message"
** (RuntimeError) override message

錯誤時 raise Exception

case File.open("config_file") do
{:ok, file} ->
  process(file)
{:error, message} ->
  raise "Failed to open config file: #{message}"
end

或是由 pattern matching 的方式處理

{ :ok, file } = File.open("config_file")
process(file)

References

Programming Elixir