[isabelle-dev] Merge-Sort Implementation (and a question on induction_schema)

Tue Nov 1 22:51:31 CET 2011

Hi once more,

I ironed out the apply-style snippets and simplified some proofs. Also 
Christian's pointer to induction_schema significantly shortened the 
proof of an induction schema I use (sequences_induct). 
"induction_schema" is really useful! However, another induction schema 
(merge_induct) seems to be "wrong" for induction_schema. Maybe because 
of an additional assumption? Any ideas?

cheers

chris

On 10/30/2011 08:50 PM, Christian Sternagel wrote:
> Hi again,
>
> stability (the third property required by @{thm
> properties_for_sort_key}) did actually cause some difficulties ;)
>
> Hence the attached theory has rough parts in some proofs. But since I
> spent the most part of the weekend on the proof, I decided to post it
> anyway. Finally I can sleep well again ;)
>
> have fun,
>
> chris
>
> On 10/27/2011 03:30 PM, Florian Haftmann wrote:
>>> Indeed, that would be the obvious next step. I have not tried yet but
>>> would not expect too hard difficulties. If this is of general interest I
>>> can try.
>>
>> Well, if you want to superseed the existing quicksort, you have to
>> provide the same generality ;-)
>>
>> Florian
>>
>>
>>
>>
>> _______________________________________________
>> isabelle-dev mailing list
>> isabelle-dev at in.tum.de
>> https://mailmanbroy.informatik.tu-muenchen.de/mailman/listinfo/isabelle-dev
>>
>
>
>
> _______________________________________________
> isabelle-dev mailing list
> isabelle-dev at in.tum.de
> https://mailmanbroy.informatik.tu-muenchen.de/mailman/listinfo/isabelle-dev

-------------- next part --------------
(*
Copyright 2011 Christian Sternagel, René Thiemann

This file is part of IsaFoR/CeTA.

IsaFoR/CeTA is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.

IsaFoR/CeTA is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along
with IsaFoR/CeTA. If not, see <http://www.gnu.org/licenses/>.
*)
theory Efficient_Sort
imports "~~/src/HOL/Library/Multiset"
begin

section {* GHC version of merge sort *}

context linorder
begin

text {*
Split a list into chunks of ascending and descending parts, where
descending parts are reversed. Hence, the result is a list of
sorted lists.
*}
fun sequences :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b list \<Rightarrow> 'b list list"
  and ascending :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> ('b list \<Rightarrow> 'b list) \<Rightarrow> 'b list \<Rightarrow> 'b list list"
  and descending :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> 'b list \<Rightarrow> 'b list \<Rightarrow> 'b list list"
where
  "sequences key (a#b#xs) = (if key a > key b
    then descending key b [a] xs
    else ascending key b (op # a) xs)"
| "sequences key xs = [xs]"

| "ascending key a f (b#bs) = (if \<not> key a > key b
    then ascending key b (f \<circ> op # a) bs
    else f [a] # sequences key (b#bs))"
| "ascending key a f bs = f [a] # sequences key bs"

| "descending key a as (b#bs) = (if key a > key b
    then descending key b (a#as) bs
    else (a#as) # sequences key (b#bs))"
| "descending key a as bs = (a#as) # sequences key bs"

fun merge :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b list \<Rightarrow> 'b list \<Rightarrow> 'b list" where
  "merge key (a#as) (b#bs) = (if key a > key b
    then b # merge key (a#as) bs
    else a # merge key as (b#bs))"
| "merge key [] bs = bs"
| "merge key as [] = as"

fun merge_pairs :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b list list \<Rightarrow> 'b list list" where
  "merge_pairs key (a#b#xs) = merge key a b # merge_pairs key xs"
| "merge_pairs key xs = xs"

lemma length_merge[simp]:
  "length (merge key xs ys) = length xs + length ys"
  by (induct xs ys rule: merge.induct) simp_all

lemma merge_pairs_length[simp]:
  "length (merge_pairs key xs) \<le> length xs"
  by (induct xs rule: merge_pairs.induct) simp_all

fun merge_all :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b list list \<Rightarrow> 'b list" where
  "merge_all key [] = []"
| "merge_all key [x] = x"
| "merge_all key xs = merge_all key (merge_pairs key xs)"

lemma set_merge[simp]:
  "set (merge key xs ys) = set xs \<union> set ys"
  by (induct xs ys rule: merge.induct) auto

lemma sorted_merge[simp]:
  assumes "sorted (map key xs)" and "sorted (map key ys)"
  shows "sorted (map key (merge key xs ys))"
  using assms by (induct xs ys rule: merge.induct) (auto simp: sorted_Cons)

lemma multiset_of_merge[simp]:
  "multiset_of (merge key xs ys) = multiset_of xs + multiset_of ys"
  by (induct xs ys rule: merge.induct) (auto simp: ac_simps)

lemma sorted_merge_pairs[simp]:
  assumes "\<forall>x\<in>set xs. sorted (map key x)"
  shows "\<forall>x\<in>set (merge_pairs key xs). sorted (map key x)"
  using assms by (induct xs rule: merge_pairs.induct) simp_all

lemma multiset_of_merge_pairs[simp]:
  "multiset_of (concat (merge_pairs key xs)) = multiset_of (concat xs)"
  by (induct xs rule: merge_pairs.induct) (auto simp: ac_simps)

lemma sorted_merge_all:
  assumes "\<forall>x\<in>set xs. sorted (map key x)"
  shows "sorted (map key (merge_all key xs))"
  using assms by (induct xs rule: merge_all.induct) simp_all

lemma multiset_of_merge_all[simp]:
  "multiset_of (merge_all key xs) = multiset_of (concat xs)"
  by (induct xs rule: merge_all.induct) (auto simp: ac_simps)

lemma
  shows sorted_sequences: "\<forall>x\<in>set (sequences key xs). sorted (map key x)"
  and "\<lbrakk>\<forall>x\<in>set (f []). key x \<le> key a; sorted (map key (f [])); \<forall>xs ys. f (xs at ys) = f xs @ ys;
    \<forall>x. f [x] = f [] @ [x]\<rbrakk> \<Longrightarrow> \<forall>x\<in>set (ascending key a f xs). sorted (map key x)"
  and "\<lbrakk>\<forall>x\<in>set bs. key a \<le> key x; sorted (map key bs)\<rbrakk>
    \<Longrightarrow> \<forall>x\<in>set (descending key a bs xs). sorted (map key x)"
  by (induct xs and a f xs and a bs xs rule: sequences_ascending_descending.induct)
     (simp_all add: sorted_append sorted_Cons,
      metis append_Cons append_Nil le_less_linear order_trans,
      metis less_le less_trans)

lemma
  shows multiset_of_sequences[simp]:
    "multiset_of (concat (sequences key xs)) = multiset_of xs"
  and "(\<And>x xs. multiset_of (f (x#xs)) = {#x#} + multiset_of (f []) + multiset_of xs)
    \<Longrightarrow> multiset_of (concat (ascending key a f xs)) = {#a#} + multiset_of (f []) + multiset_of xs"
  and "multiset_of (concat (descending key a bs xs)) = {#a#} + multiset_of bs + multiset_of xs"
by (induct xs and a f xs and a bs xs rule: sequences_ascending_descending.induct)
   (simp_all add: ac_simps)

lemma set_concat_merge_pairs[simp]:
  "set (concat (merge_pairs key xs)) = set (concat xs)"
  by (induct xs rule: merge_pairs.induct) (simp_all add: ac_simps)

fun take_desc :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> 'b list \<Rightarrow> 'b list" where
  "take_desc key a [] = [a]"
| "take_desc key a (x#xs) = (if key x < key a
    then a # take_desc key x xs
    else [a])"

fun drop_desc :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> 'b list \<Rightarrow> 'b list" where
  "drop_desc key a [] = []"
| "drop_desc key a (x#xs) = (if key x < key a
    then drop_desc key x xs
    else x#xs)"

lemma take_desc_drop_desc[simp]:
  "take_desc key x xs @ drop_desc key x xs = x#xs"
  by (induct xs arbitrary: x) simp_all

lemma descending_take_desc_drop_desc_conv[simp]:
  "descending key a bs xs
    = (rev (take_desc key a xs) @ bs) # sequences key (drop_desc key a xs)"
proof (induct xs arbitrary: a bs)
  case (Cons x xs) thus ?case by (cases "key a < key x") simp_all
qed simp

fun take_asc :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> 'b list \<Rightarrow> 'b list" where
  "take_asc key a [] = []"
| "take_asc key a (x#xs) = (if \<not> key a > key x
    then x # take_asc key x xs
    else [])"

fun drop_asc :: "('b \<Rightarrow> 'a) \<Rightarrow> 'b \<Rightarrow> 'b list \<Rightarrow> 'b list" where
  "drop_asc key a [] = []"
| "drop_asc key a (x#xs) = (if \<not> key a > key x
    then drop_asc key x xs
    else x#xs)"

lemma take_asc_drop_asc[simp]:
  "take_asc key a xs @ drop_asc key a xs = xs"
  by (induct xs arbitrary: a) simp_all

lemma ascending_take_asc_drop_asc_conv_gen:
  assumes "\<And>xs ys. f (xs at ys) = f xs @ ys"
  shows "ascending key a (f \<circ> op @ as) xs
    = (f as @ a # take_asc key a xs) # sequences key (drop_asc key a xs)"
using assms
proof (induct xs arbitrary: as a)
  case Nil thus ?case by simp
next
  case (Cons x xs)
  show ?case
  proof (cases "key x < key a")
    case True with Cons show ?thesis by simp
  next
    case False
    with Cons(1)[of "x" "as@[a]"] and Cons(2)
      show ?thesis by (simp add: o_def)
  qed
qed

lemma ascending_take_asc_drop_asc_conv[simp]:
  "ascending key b (op # a) xs
    = (a # b # take_asc key b xs) # sequences key (drop_asc key b xs)"
proof -
  let ?f = "op # a"
  have "\<And>xs ys. (op # a) (xs at ys) = (op # a) xs @ ys" by simp
  from ascending_take_asc_drop_asc_conv_gen[of ?f key b "[]" xs, OF this]
    show ?thesis by (simp add: o_def)
qed

lemma sequences_simp[simp]:
  "sequences key (a#b#xs) = (if key b < key a
    then (rev (take_desc key b xs)@[a]) # sequences key (drop_desc key b xs)
    else (a # b # take_asc key b xs) # sequences key (drop_asc key b xs))"
  by simp
declare sequences.simps(1)[simp del]

lemma length_drop_desc[termination_simp]:
  fixes xs::"'b list"
  shows "length (drop_desc key b xs) < Suc (Suc (length xs))"
    (is "?P b xs")
proof (induct xs arbitrary: b rule: length_induct)
  fix xs::"'b list" and b
  assume IH: "\<forall>ys. length ys < length xs \<longrightarrow> (\<forall>x. ?P x ys)"
  show "?P b xs"
  proof (cases xs)
    case Nil thus ?thesis by simp
  next
    case (Cons y ys) with IH[rule_format, of ys y]
      show ?thesis by simp
  qed
qed

lemma length_drop_asc[termination_simp]:
  fixes xs::"'b list"
  shows "length (drop_asc key b xs) < Suc (Suc (length xs))"
    (is "?P b xs")
proof (induct xs arbitrary: b rule: length_induct)
  fix xs::"'b list" and b
  assume IH: "\<forall>ys. length ys < length xs \<longrightarrow> (\<forall>x. ?P x ys)"
  show "?P b xs"
  proof (cases xs)
    case Nil thus ?thesis by simp
  next
    case (Cons y ys) with IH[rule_format, of ys y]
      show ?thesis by simp
  qed
qed

lemma sequences_induct[case_names Nil singleton IH]:
  assumes "\<And>key. P key []" and "\<And>key x. P key [x]"
    and "\<And>key a b xs.
    (key b < key a \<Longrightarrow> P key (drop_desc key b xs))
    \<Longrightarrow> (\<not> key b < key a \<Longrightarrow> P key (drop_asc key b xs))
    \<Longrightarrow> P key (a#b#xs)"
  shows "P key xs"
  using assms by (induction_schema) (pat_completeness, lexicographic_order)

lemma take_desc_less:
  assumes "key x < key y"
  shows "\<forall>z\<in>set (take_desc key x xs). key z < key y"
  using assms
  by (induct xs arbitrary: x y) (auto, force)

lemma take_desc_less':
  assumes "key x < key y"
  shows "\<forall>z\<in>set (rev (take_desc key x xs)). key y \<noteq> key z"
  using take_desc_less[of key x y, OF assms]
  unfolding less_le by force

lemma filter_by_key_drop_desc[simp]:
  assumes "key b \<le> key x"
  shows "[y\<leftarrow>drop_desc key b xs . key x = key y] = [y\<leftarrow>xs . key x = key y]"
  using assms by (induct xs arbitrary: b) auto

lemma filter_by_key_rev_take_desc[simp]:
  "[y\<leftarrow>rev (take_desc key b xs). key b = key y] = [b]"
proof (cases xs)
  case Nil thus ?thesis by simp
next
  case (Cons y ys)
  {
    assume "key y < key b"
    from filter_False[OF take_desc_less'[of key y b ys], OF this]
      have "[z\<leftarrow>rev (take_desc key y ys). key b = key z] = []" .
  }
  thus ?thesis by (simp add: Cons)
qed

lemma filter_by_key_take_desc'[simp]:
  assumes "key b < key a"
  shows "[y\<leftarrow>rev (take_desc key b xs). key a = key y] = []"
  using filter_False[OF take_desc_less'[of key b a, OF assms]] .

lemma length_rev_take_desc:
  "length [y\<leftarrow>rev (take_desc key b xs). key x = key y] \<le> Suc 0"
  by (induct xs arbitrary: b) simp_all

lemma rev_take_desc_fiter_key_conv[simp]: 
  "[y\<leftarrow>rev (take_desc key b xs). key x = key y]
    = [y\<leftarrow>take_desc key b xs. key x = key y]"
    (is "?xs = ?ys")
proof -
  from length_rev_take_desc[of key x b xs]
    have "length ?xs = 0 \<or> length ?xs = Suc 0" by arith
  thus ?thesis
  proof
    assume "length ?xs = 0"
    hence "?xs = []" by simp
    thus ?thesis unfolding rev_filter[symmetric] rev_is_Nil_conv by simp
  next
    assume "length ?xs = Suc 0"
    then obtain a where a: "?xs = [a]" by (cases "?xs") simp_all
    show ?thesis
      using a[unfolded rev_filter[symmetric],
        unfolded rev_singleton_conv, unfolded a[symmetric], symmetric] .
  qed
qed

lemma filter_by_key_take_desc[simp]:
  "[y\<leftarrow>take_desc key b xs. key b = key y] = [b]"
proof -
  from filter_by_key_rev_take_desc[of key b xs]
    show ?thesis unfolding rev_filter[symmetric]
    unfolding rev_singleton_conv by simp
qed

lemmas [simp] = filter_by_key_take_desc'[simplified]

lemma filter_take_asc_drop_asc[simp]:
  "filter P (take_asc key x xs) @ filter P (drop_asc key x xs)
    = filter P xs"
  by (simp add: filter_append[symmetric])

lemma filter_take_desc_drop_desc[simp]:
  "filter P (take_desc key x xs) @ filter P (drop_desc key x xs)
    = filter P (x#xs)"
  by (simp add: filter_append[symmetric])

lemma filter_by_key_sequences[simp]:
  "[y\<leftarrow>concat (sequences key xs). key x = key y]
    = [y\<leftarrow>xs. key x = key y]" (is "?P key xs")
  by (induct key xs rule: sequences_induct) auto

lemma set_merge_all[simp]: "set (merge_all key xs) = set (concat xs)"
  by (induct xs rule: merge_all.induct)
     (simp_all del: set_concat merge_pairs.simps)

lemma merge_Nil2[simp]: "merge key xs [] = xs" by (cases xs) simp_all

lemma merge_simp[simp]:
  assumes "sorted (map key xs)"
  shows "merge key xs (y#ys)
    = takeWhile (\<lambda>x. key x \<le> key y) xs
    @ y#merge key (dropWhile (\<lambda>x. key x \<le> key y) xs) ys"
  using assms by (induct xs arbitrary: y ys) (auto simp: sorted_Cons)

lemma sorted_merge_induct[consumes 1, case_names Nil IH]:
  fixes key::"'b \<Rightarrow> 'a"
  assumes "sorted (map key xs)"
    and "\<And>xs. P key xs []"
    and "\<And>xs y ys. sorted (map key xs) \<Longrightarrow>
    P key (dropWhile (\<lambda>x. key x \<le> key y) xs) ys
    \<Longrightarrow> P key xs (y#ys)"
  shows "P key xs ys"
using assms(1)
proof (induct "xs at ys" arbitrary: xs ys rule: length_induct)
  fix xs ys::"'b list"
  assume IH[rule_format]: "\<forall>zs. length zs < length (xs at ys) \<longrightarrow>
    (\<forall>us vs. zs = us @ vs \<longrightarrow> sorted (map key us) \<longrightarrow> P key us vs)"
    and sorted: "sorted (map key xs)"
  let ?f = "\<lambda>y xs. dropWhile (\<lambda>x. key x \<le> key y) xs"
  show "P key xs ys"
  proof (cases ys)
    case Nil thus ?thesis using assms(2) by simp
  next
    case (Cons v vs)
    show ?thesis unfolding Cons
    proof (rule assms(3)[OF sorted])
      show "P key (?f v xs) vs"
      proof (rule IH)
        show "length (?f v xs @ vs) < length (xs at ys)"
        unfolding Cons using length_dropWhile_le[of "\<lambda>x. key x \<le> key v" xs]
        unfolding length_append list.size by simp
      next
        show "?f v xs @ vs = ?f v xs @ vs" by simp
      next
        show "sorted (map key (?f v xs))"
          using sorted_dropWhile[OF sorted, of "\<lambda>x. x \<le> key v"]
          unfolding dropWhile_map o_def .
      qed
    qed
  qed
qed

lemma filter_by_key_dropWhile[simp]:
  assumes "sorted (map key xs)"
  shows "[y\<leftarrow>dropWhile (\<lambda>x. key x \<le> key z) xs. key z = key y] = []"
    (is "[y\<leftarrow>dropWhile ?P xs. key z = key y] = []")
using assms
proof (induct xs rule: rev_induct)
  case Nil thus ?case by simp
next
  case (snoc x xs)
  hence IH: "[y\<leftarrow>dropWhile ?P xs. key z = key y] = []"
    by (auto simp: sorted_append)
  show ?case
  proof (cases "\<forall>z\<in>set xs. ?P z")
    case True
    show ?thesis
      using dropWhile_append2[of xs ?P "[x]"] and True by simp
  next
    case False
    then obtain a where a: "a \<in> set xs" "\<not> ?P a" by auto
    show ?thesis
      unfolding dropWhile_append1[of a xs ?P, OF a]
      using snoc and False by (auto simp: IH sorted_append)
  qed
qed

lemma filter_by_key_takeWhile[simp]:
  assumes "sorted (map key xs)"
  shows "[y\<leftarrow>takeWhile (\<lambda>x. key x \<le> key z) xs. key z = key y]
    = [y\<leftarrow>xs. key z = key y]"
    (is "[y\<leftarrow>takeWhile ?P xs. key z = key y] = _")
using assms
proof (induct xs rule: rev_induct)
  case Nil thus ?case by simp
next
  case (snoc x xs)
  hence IH: "[y\<leftarrow>takeWhile ?P xs. key z = key y] = [y\<leftarrow>xs. key z = key y]"
    by (auto simp: sorted_append)
  show ?case
  proof (cases "\<forall>z\<in>set xs. ?P z")
    case True
    show ?thesis
      using takeWhile_append2[of xs ?P "[x]"] and True by simp
  next
    case False
    then obtain a where a: "a \<in> set xs" "\<not> ?P a" by auto
    show ?thesis
      unfolding takeWhile_append1[of a xs ?P, OF a]
      using snoc and False by (auto simp: IH sorted_append)
  qed
qed

lemma filter_takeWhile_dropWhile_id[simp]:
  "filter P (takeWhile Q xs) @ filter P (dropWhile Q xs)
    = filter P xs"
  by (simp add: filter_append[symmetric])

lemma filter_by_key_merge_is_append[simp]:
  assumes "sorted (map key xs)"
  shows "[y\<leftarrow>merge key xs ys. key x = key y]
    = [y\<leftarrow>xs. key x = key y] @ [y\<leftarrow>ys. key x = key y]"
    (is "?P xs ys")
  using assms by (induct key xs ys rule: sorted_merge_induct) auto

lemma filter_by_key_merge_pairs[simp]:
  assumes "\<forall>xs\<in>set xss. sorted (map key xs)"
  shows "[y\<leftarrow>concat (merge_pairs key xss). key x = key y]
    = [y\<leftarrow>concat xss. key x = key y]"
  using assms by (induct xss rule: merge_pairs.induct) simp_all

lemma filter_by_key_merge_all[simp]:
  assumes "\<forall>xs\<in>set xss. sorted (map key xs)"
  shows "[y\<leftarrow>merge_all key xss. key x = key y]
    = [y\<leftarrow>concat xss. key x = key y]"
  using assms by (induct xss rule: merge_all.induct) simp_all

lemma filter_by_key_merge_all_sequences[simp]:
  "[x\<leftarrow>merge_all key (sequences key xs) . key y = key x]
    = [x\<leftarrow>xs . key y = key x]"
  using sorted_sequences[of key xs] by simp

lemma sort_key_merge_all_sequences:
  "sort_key key = merge_all key \<circ> sequences key"
  by (intro ext properties_for_sort_key)
     (simp_all add: sorted_merge_all[OF sorted_sequences])

text {*
Replace existing code equations for @{const sort_key} by
@{term "merge_all key \<circ> sequences key"}.
*}
declare sort_key_merge_all_sequences[code]

end

end