This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/gh-pages by this push:
new 66906d5a deploy: d33f3bb77ede1bf481bf71d9ddb45cb4cdcbd858
66906d5a is described below
commit 66906d5a2aba3c47559d5aa915a8e40ad1f99eef
Author: kevinjqliu <[email protected]>
AuthorDate: Mon Oct 20 16:52:19 2025 +0000
deploy: d33f3bb77ede1bf481bf71d9ddb45cb4cdcbd858
---
api/src/iceberg/delete_file_index.rs.html | 562 ++++++++++++++++++++++--------
1 file changed, 418 insertions(+), 144 deletions(-)
diff --git a/api/src/iceberg/delete_file_index.rs.html
b/api/src/iceberg/delete_file_index.rs.html
index cee3776e..6eeb2e32 100644
--- a/api/src/iceberg/delete_file_index.rs.html
+++ b/api/src/iceberg/delete_file_index.rs.html
@@ -42,7 +42,7 @@
<a href=#42 id=42 data-nosnippet>42</a><span class="attr">#[derive(Debug)]
<a href=#43 id=43 data-nosnippet>43</a></span><span class="kw">struct
</span>PopulatedDeleteFileIndex {
<a href=#44 id=44 data-nosnippet>44</a> <span
class="attr">#[allow(dead_code)]
-<a href=#45 id=45 data-nosnippet>45</a> </span>global_deletes:
Vec<Arc<DeleteFileContext>>,
+<a href=#45 id=45 data-nosnippet>45</a> </span>global_equality_deletes:
Vec<Arc<DeleteFileContext>>,
<a href=#46 id=46 data-nosnippet>46</a> eq_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>>,
<a href=#47 id=47 data-nosnippet>47</a> pos_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>>,
<a href=#48 id=48 data-nosnippet>48</a> <span class="comment">// TODO: do
we need this?
@@ -65,146 +65,420 @@
<a href=#65 id=65 data-nosnippet>65</a> spawn({
<a href=#66 id=66 data-nosnippet>66</a> <span class="kw">let
</span>state = state.clone();
<a href=#67 id=67 data-nosnippet>67</a> <span class="kw">async move
</span>{
-<a href=#68 id=68 data-nosnippet>68</a> <span class="kw">let
</span>delete_files = delete_file_stream.collect::<Vec<<span
class="kw">_</span>>>().<span class="kw">await</span>;
-<a href=#69 id=69 data-nosnippet>69</a>
-<a href=#70 id=70 data-nosnippet>70</a> <span class="kw">let
</span>populated_delete_file_index =
PopulatedDeleteFileIndex::new(delete_files);
-<a href=#71 id=71 data-nosnippet>71</a>
-<a href=#72 id=72 data-nosnippet>72</a> {
-<a href=#73 id=73 data-nosnippet>73</a> <span
class="kw">let </span><span class="kw-2">mut </span>guard =
state.write().unwrap();
-<a href=#74 id=74 data-nosnippet>74</a> <span
class="kw-2">*</span>guard =
DeleteFileIndexState::Populated(populated_delete_file_index);
-<a href=#75 id=75 data-nosnippet>75</a> }
-<a href=#76 id=76 data-nosnippet>76</a> notify.notify_waiters();
-<a href=#77 id=77 data-nosnippet>77</a> }
-<a href=#78 id=78 data-nosnippet>78</a> });
-<a href=#79 id=79 data-nosnippet>79</a>
-<a href=#80 id=80 data-nosnippet>80</a> (DeleteFileIndex { state }, tx)
-<a href=#81 id=81 data-nosnippet>81</a> }
-<a href=#82 id=82 data-nosnippet>82</a>
-<a href=#83 id=83 data-nosnippet>83</a> <span class="doccomment">/// Gets
all the delete files that apply to the specified data file.
-<a href=#84 id=84 data-nosnippet>84</a> </span><span
class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">async fn
</span>get_deletes_for_data_file(
-<a href=#85 id=85 data-nosnippet>85</a> <span
class="kw-2">&</span><span class="self">self</span>,
-<a href=#86 id=86 data-nosnippet>86</a> data_file: <span
class="kw-2">&</span>DataFile,
-<a href=#87 id=87 data-nosnippet>87</a> seq_num: <span
class="prelude-ty">Option</span><i64>,
-<a href=#88 id=88 data-nosnippet>88</a> ) ->
Vec<FileScanTaskDeleteFile> {
-<a href=#89 id=89 data-nosnippet>89</a> <span class="kw">let
</span>notifier = {
-<a href=#90 id=90 data-nosnippet>90</a> <span class="kw">let
</span>guard = <span class="self">self</span>.state.read().unwrap();
-<a href=#91 id=91 data-nosnippet>91</a> <span class="kw">match
</span><span class="kw-2">*</span>guard {
-<a href=#92 id=92 data-nosnippet>92</a>
DeleteFileIndexState::Populating(<span class="kw-2">ref </span>notifier) =>
notifier.clone(),
-<a href=#93 id=93 data-nosnippet>93</a>
DeleteFileIndexState::Populated(<span class="kw-2">ref </span>index) => {
-<a href=#94 id=94 data-nosnippet>94</a> <span
class="kw">return </span>index.get_deletes_for_data_file(data_file, seq_num);
-<a href=#95 id=95 data-nosnippet>95</a> }
-<a href=#96 id=96 data-nosnippet>96</a> }
-<a href=#97 id=97 data-nosnippet>97</a> };
-<a href=#98 id=98 data-nosnippet>98</a>
-<a href=#99 id=99 data-nosnippet>99</a> notifier.notified().<span
class="kw">await</span>;
-<a href=#100 id=100 data-nosnippet>100</a>
-<a href=#101 id=101 data-nosnippet>101</a> <span class="kw">let
</span>guard = <span class="self">self</span>.state.read().unwrap();
-<a href=#102 id=102 data-nosnippet>102</a> <span class="kw">match
</span>guard.deref() {
-<a href=#103 id=103 data-nosnippet>103</a>
DeleteFileIndexState::Populated(index) => {
-<a href=#104 id=104 data-nosnippet>104</a>
index.get_deletes_for_data_file(data_file, seq_num)
-<a href=#105 id=105 data-nosnippet>105</a> }
-<a href=#106 id=106 data-nosnippet>106</a> <span class="kw">_
</span>=> <span class="macro">unreachable!</span>(<span
class="string">"Cannot be any other state than loaded"</span>),
-<a href=#107 id=107 data-nosnippet>107</a> }
-<a href=#108 id=108 data-nosnippet>108</a> }
-<a href=#109 id=109 data-nosnippet>109</a>}
-<a href=#110 id=110 data-nosnippet>110</a>
-<a href=#111 id=111 data-nosnippet>111</a><span class="kw">impl
</span>PopulatedDeleteFileIndex {
-<a href=#112 id=112 data-nosnippet>112</a> <span class="doccomment">///
Creates a new populated delete file index from a list of delete file contexts,
which
-<a href=#113 id=113 data-nosnippet>113</a> /// allows for fast lookup when
determining which delete files apply to a given data file.
-<a href=#114 id=114 data-nosnippet>114</a> ///
-<a href=#115 id=115 data-nosnippet>115</a> /// 1. The partition information
is extracted from each delete file's manifest entry.
-<a href=#116 id=116 data-nosnippet>116</a> /// 2. If the partition is empty
and the delete file is not a positional delete,
-<a href=#117 id=117 data-nosnippet>117</a> /// it is added to the
`global_deletes` vector
-<a href=#118 id=118 data-nosnippet>118</a> /// 3. Otherwise, the delete
file is added to one of two hash maps based on its content type.
-<a href=#119 id=119 data-nosnippet>119</a> </span><span class="kw">fn
</span>new(files: Vec<DeleteFileContext>) -> PopulatedDeleteFileIndex {
-<a href=#120 id=120 data-nosnippet>120</a> <span class="kw">let
</span><span class="kw-2">mut </span>eq_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>> =
-<a href=#121 id=121 data-nosnippet>121</a> HashMap::default();
-<a href=#122 id=122 data-nosnippet>122</a> <span class="kw">let
</span><span class="kw-2">mut </span>pos_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>> =
-<a href=#123 id=123 data-nosnippet>123</a> HashMap::default();
-<a href=#124 id=124 data-nosnippet>124</a>
-<a href=#125 id=125 data-nosnippet>125</a> <span class="kw">let
</span><span class="kw-2">mut </span>global_deletes:
Vec<Arc<DeleteFileContext>> = <span class="macro">vec!</span>[];
-<a href=#126 id=126 data-nosnippet>126</a>
-<a href=#127 id=127 data-nosnippet>127</a>
files.into_iter().for_each(|ctx| {
-<a href=#128 id=128 data-nosnippet>128</a> <span class="kw">let
</span>arc_ctx = Arc::new(ctx);
-<a href=#129 id=129 data-nosnippet>129</a>
-<a href=#130 id=130 data-nosnippet>130</a> <span class="kw">let
</span>partition = arc_ctx.manifest_entry.data_file().partition();
-<a href=#131 id=131 data-nosnippet>131</a>
-<a href=#132 id=132 data-nosnippet>132</a> <span class="comment">//
The spec states that "Equality delete files stored with an unpartitioned spec
are applied as global deletes".
-<a href=#133 id=133 data-nosnippet>133</a> </span><span
class="kw">if </span>partition.fields().is_empty() {
-<a href=#134 id=134 data-nosnippet>134</a> <span
class="comment">// TODO: confirm we're good to skip here if we encounter a pos
del
-<a href=#135 id=135 data-nosnippet>135</a> </span><span
class="kw">if </span>arc_ctx.manifest_entry.content_type() !=
DataContentType::PositionDeletes {
-<a href=#136 id=136 data-nosnippet>136</a>
global_deletes.push(arc_ctx);
-<a href=#137 id=137 data-nosnippet>137</a> <span
class="kw">return</span>;
-<a href=#138 id=138 data-nosnippet>138</a> }
-<a href=#139 id=139 data-nosnippet>139</a> }
-<a href=#140 id=140 data-nosnippet>140</a>
-<a href=#141 id=141 data-nosnippet>141</a> <span class="kw">let
</span>destination_map = <span class="kw">match
</span>arc_ctx.manifest_entry.content_type() {
-<a href=#142 id=142 data-nosnippet>142</a>
DataContentType::PositionDeletes => <span class="kw-2">&mut
</span>pos_deletes_by_partition,
-<a href=#143 id=143 data-nosnippet>143</a>
DataContentType::EqualityDeletes => <span class="kw-2">&mut
</span>eq_deletes_by_partition,
-<a href=#144 id=144 data-nosnippet>144</a> <span class="kw">_
</span>=> <span class="macro">unreachable!</span>(),
-<a href=#145 id=145 data-nosnippet>145</a> };
-<a href=#146 id=146 data-nosnippet>146</a>
-<a href=#147 id=147 data-nosnippet>147</a> destination_map
-<a href=#148 id=148 data-nosnippet>148</a>
.entry(partition.clone())
-<a href=#149 id=149 data-nosnippet>149</a> .and_modify(|entry| {
-<a href=#150 id=150 data-nosnippet>150</a>
entry.push(arc_ctx.clone());
-<a href=#151 id=151 data-nosnippet>151</a> })
-<a href=#152 id=152 data-nosnippet>152</a> .or_insert(<span
class="macro">vec!</span>[arc_ctx.clone()]);
-<a href=#153 id=153 data-nosnippet>153</a> });
-<a href=#154 id=154 data-nosnippet>154</a>
-<a href=#155 id=155 data-nosnippet>155</a> PopulatedDeleteFileIndex {
-<a href=#156 id=156 data-nosnippet>156</a> global_deletes,
-<a href=#157 id=157 data-nosnippet>157</a> eq_deletes_by_partition,
-<a href=#158 id=158 data-nosnippet>158</a> pos_deletes_by_partition,
-<a href=#159 id=159 data-nosnippet>159</a> }
-<a href=#160 id=160 data-nosnippet>160</a> }
-<a href=#161 id=161 data-nosnippet>161</a>
-<a href=#162 id=162 data-nosnippet>162</a> <span class="doccomment">///
Determine all the delete files that apply to the provided `DataFile`.
-<a href=#163 id=163 data-nosnippet>163</a> </span><span class="kw">fn
</span>get_deletes_for_data_file(
-<a href=#164 id=164 data-nosnippet>164</a> <span
class="kw-2">&</span><span class="self">self</span>,
-<a href=#165 id=165 data-nosnippet>165</a> data_file: <span
class="kw-2">&</span>DataFile,
-<a href=#166 id=166 data-nosnippet>166</a> seq_num: <span
class="prelude-ty">Option</span><i64>,
-<a href=#167 id=167 data-nosnippet>167</a> ) ->
Vec<FileScanTaskDeleteFile> {
-<a href=#168 id=168 data-nosnippet>168</a> <span class="kw">let
</span><span class="kw-2">mut </span>results = <span
class="macro">vec!</span>[];
-<a href=#169 id=169 data-nosnippet>169</a>
-<a href=#170 id=170 data-nosnippet>170</a> <span
class="self">self</span>.global_deletes
-<a href=#171 id=171 data-nosnippet>171</a> .iter()
-<a href=#172 id=172 data-nosnippet>172</a> <span class="comment">//
filter that returns true if the provided delete file's sequence number is
**greater than or equal to** `seq_num`
-<a href=#173 id=173 data-nosnippet>173</a> </span>.filter(|<span
class="kw-2">&</span>delete| {
-<a href=#174 id=174 data-nosnippet>174</a> seq_num
-<a href=#175 id=175 data-nosnippet>175</a> .map(|seq_num|
delete.manifest_entry.sequence_number() >= <span
class="prelude-val">Some</span>(seq_num))
-<a href=#176 id=176 data-nosnippet>176</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
-<a href=#177 id=177 data-nosnippet>177</a> })
-<a href=#178 id=178 data-nosnippet>178</a> .for_each(|delete|
results.push(delete.as_ref().into()));
-<a href=#179 id=179 data-nosnippet>179</a>
-<a href=#180 id=180 data-nosnippet>180</a> <span class="kw">if let
</span><span class="prelude-val">Some</span>(deletes) = <span
class="self">self</span>.eq_deletes_by_partition.get(data_file.partition()) {
-<a href=#181 id=181 data-nosnippet>181</a> deletes
-<a href=#182 id=182 data-nosnippet>182</a> .iter()
-<a href=#183 id=183 data-nosnippet>183</a> <span
class="comment">// filter that returns true if the provided delete file's
sequence number is **greater than** `seq_num`
-<a href=#184 id=184 data-nosnippet>184</a>
</span>.filter(|<span class="kw-2">&</span>delete| {
-<a href=#185 id=185 data-nosnippet>185</a> seq_num
-<a href=#186 id=186 data-nosnippet>186</a>
.map(|seq_num| delete.manifest_entry.sequence_number() > <span
class="prelude-val">Some</span>(seq_num))
-<a href=#187 id=187 data-nosnippet>187</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
-<a href=#188 id=188 data-nosnippet>188</a> })
-<a href=#189 id=189 data-nosnippet>189</a> .for_each(|delete|
results.push(delete.as_ref().into()));
-<a href=#190 id=190 data-nosnippet>190</a> }
-<a href=#191 id=191 data-nosnippet>191</a>
-<a href=#192 id=192 data-nosnippet>192</a> <span class="comment">//
TODO: the spec states that:
-<a href=#193 id=193 data-nosnippet>193</a> // "The data file's
file_path is equal to the delete file's referenced_data_file if it is non-null".
-<a href=#194 id=194 data-nosnippet>194</a> // we're not yet doing
that here. The referenced data file's name will also be present in the
positional
-<a href=#195 id=195 data-nosnippet>195</a> // delete file's file
path column.
-<a href=#196 id=196 data-nosnippet>196</a> </span><span class="kw">if
let </span><span class="prelude-val">Some</span>(deletes) = <span
class="self">self</span>.pos_deletes_by_partition.get(data_file.partition()) {
-<a href=#197 id=197 data-nosnippet>197</a> deletes
-<a href=#198 id=198 data-nosnippet>198</a> .iter()
-<a href=#199 id=199 data-nosnippet>199</a> <span
class="comment">// filter that returns true if the provided delete file's
sequence number is **greater than or equal to** `seq_num`
-<a href=#200 id=200 data-nosnippet>200</a>
</span>.filter(|<span class="kw-2">&</span>delete| {
-<a href=#201 id=201 data-nosnippet>201</a> seq_num
-<a href=#202 id=202 data-nosnippet>202</a>
.map(|seq_num| delete.manifest_entry.sequence_number() >= <span
class="prelude-val">Some</span>(seq_num))
-<a href=#203 id=203 data-nosnippet>203</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
-<a href=#204 id=204 data-nosnippet>204</a> })
-<a href=#205 id=205 data-nosnippet>205</a> .for_each(|delete|
results.push(delete.as_ref().into()));
-<a href=#206 id=206 data-nosnippet>206</a> }
-<a href=#207 id=207 data-nosnippet>207</a>
-<a href=#208 id=208 data-nosnippet>208</a> results
-<a href=#209 id=209 data-nosnippet>209</a> }
-<a href=#210 id=210
data-nosnippet>210</a>}</code></pre></div></section></main></body></html>
\ No newline at end of file
+<a href=#68 id=68 data-nosnippet>68</a> <span class="kw">let
</span>delete_files: Vec<DeleteFileContext> =
+<a href=#69 id=69 data-nosnippet>69</a>
delete_file_stream.collect::<Vec<<span
class="kw">_</span>>>().<span class="kw">await</span>;
+<a href=#70 id=70 data-nosnippet>70</a>
+<a href=#71 id=71 data-nosnippet>71</a> <span class="kw">let
</span>populated_delete_file_index =
PopulatedDeleteFileIndex::new(delete_files);
+<a href=#72 id=72 data-nosnippet>72</a>
+<a href=#73 id=73 data-nosnippet>73</a> {
+<a href=#74 id=74 data-nosnippet>74</a> <span
class="kw">let </span><span class="kw-2">mut </span>guard =
state.write().unwrap();
+<a href=#75 id=75 data-nosnippet>75</a> <span
class="kw-2">*</span>guard =
DeleteFileIndexState::Populated(populated_delete_file_index);
+<a href=#76 id=76 data-nosnippet>76</a> }
+<a href=#77 id=77 data-nosnippet>77</a> notify.notify_waiters();
+<a href=#78 id=78 data-nosnippet>78</a> }
+<a href=#79 id=79 data-nosnippet>79</a> });
+<a href=#80 id=80 data-nosnippet>80</a>
+<a href=#81 id=81 data-nosnippet>81</a> (DeleteFileIndex { state }, tx)
+<a href=#82 id=82 data-nosnippet>82</a> }
+<a href=#83 id=83 data-nosnippet>83</a>
+<a href=#84 id=84 data-nosnippet>84</a> <span class="doccomment">/// Gets
all the delete files that apply to the specified data file.
+<a href=#85 id=85 data-nosnippet>85</a> </span><span
class="kw">pub</span>(<span class="kw">crate</span>) <span class="kw">async fn
</span>get_deletes_for_data_file(
+<a href=#86 id=86 data-nosnippet>86</a> <span
class="kw-2">&</span><span class="self">self</span>,
+<a href=#87 id=87 data-nosnippet>87</a> data_file: <span
class="kw-2">&</span>DataFile,
+<a href=#88 id=88 data-nosnippet>88</a> seq_num: <span
class="prelude-ty">Option</span><i64>,
+<a href=#89 id=89 data-nosnippet>89</a> ) ->
Vec<FileScanTaskDeleteFile> {
+<a href=#90 id=90 data-nosnippet>90</a> <span class="kw">let
</span>notifier = {
+<a href=#91 id=91 data-nosnippet>91</a> <span class="kw">let
</span>guard = <span class="self">self</span>.state.read().unwrap();
+<a href=#92 id=92 data-nosnippet>92</a> <span class="kw">match
</span><span class="kw-2">*</span>guard {
+<a href=#93 id=93 data-nosnippet>93</a>
DeleteFileIndexState::Populating(<span class="kw-2">ref </span>notifier) =>
notifier.clone(),
+<a href=#94 id=94 data-nosnippet>94</a>
DeleteFileIndexState::Populated(<span class="kw-2">ref </span>index) => {
+<a href=#95 id=95 data-nosnippet>95</a> <span
class="kw">return </span>index.get_deletes_for_data_file(data_file, seq_num);
+<a href=#96 id=96 data-nosnippet>96</a> }
+<a href=#97 id=97 data-nosnippet>97</a> }
+<a href=#98 id=98 data-nosnippet>98</a> };
+<a href=#99 id=99 data-nosnippet>99</a>
+<a href=#100 id=100 data-nosnippet>100</a> notifier.notified().<span
class="kw">await</span>;
+<a href=#101 id=101 data-nosnippet>101</a>
+<a href=#102 id=102 data-nosnippet>102</a> <span class="kw">let
</span>guard = <span class="self">self</span>.state.read().unwrap();
+<a href=#103 id=103 data-nosnippet>103</a> <span class="kw">match
</span>guard.deref() {
+<a href=#104 id=104 data-nosnippet>104</a>
DeleteFileIndexState::Populated(index) => {
+<a href=#105 id=105 data-nosnippet>105</a>
index.get_deletes_for_data_file(data_file, seq_num)
+<a href=#106 id=106 data-nosnippet>106</a> }
+<a href=#107 id=107 data-nosnippet>107</a> <span class="kw">_
</span>=> <span class="macro">unreachable!</span>(<span
class="string">"Cannot be any other state than loaded"</span>),
+<a href=#108 id=108 data-nosnippet>108</a> }
+<a href=#109 id=109 data-nosnippet>109</a> }
+<a href=#110 id=110 data-nosnippet>110</a>}
+<a href=#111 id=111 data-nosnippet>111</a>
+<a href=#112 id=112 data-nosnippet>112</a><span class="kw">impl
</span>PopulatedDeleteFileIndex {
+<a href=#113 id=113 data-nosnippet>113</a> <span class="doccomment">///
Creates a new populated delete file index from a list of delete file contexts,
which
+<a href=#114 id=114 data-nosnippet>114</a> /// allows for fast lookup when
determining which delete files apply to a given data file.
+<a href=#115 id=115 data-nosnippet>115</a> ///
+<a href=#116 id=116 data-nosnippet>116</a> /// 1. The partition information
is extracted from each delete file's manifest entry.
+<a href=#117 id=117 data-nosnippet>117</a> /// 2. If the partition is empty
and the delete file is not a positional delete,
+<a href=#118 id=118 data-nosnippet>118</a> /// it is added to the
`global_equality_deletes` vector
+<a href=#119 id=119 data-nosnippet>119</a> /// 3. Otherwise, the delete
file is added to one of two hash maps based on its content type.
+<a href=#120 id=120 data-nosnippet>120</a> </span><span class="kw">fn
</span>new(files: Vec<DeleteFileContext>) -> PopulatedDeleteFileIndex {
+<a href=#121 id=121 data-nosnippet>121</a> <span class="kw">let
</span><span class="kw-2">mut </span>eq_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>> =
+<a href=#122 id=122 data-nosnippet>122</a> HashMap::default();
+<a href=#123 id=123 data-nosnippet>123</a> <span class="kw">let
</span><span class="kw-2">mut </span>pos_deletes_by_partition:
HashMap<Struct, Vec<Arc<DeleteFileContext>>> =
+<a href=#124 id=124 data-nosnippet>124</a> HashMap::default();
+<a href=#125 id=125 data-nosnippet>125</a>
+<a href=#126 id=126 data-nosnippet>126</a> <span class="kw">let
</span><span class="kw-2">mut </span>global_equality_deletes:
Vec<Arc<DeleteFileContext>> = <span class="macro">vec!</span>[];
+<a href=#127 id=127 data-nosnippet>127</a>
+<a href=#128 id=128 data-nosnippet>128</a>
files.into_iter().for_each(|ctx| {
+<a href=#129 id=129 data-nosnippet>129</a> <span class="kw">let
</span>arc_ctx = Arc::new(ctx);
+<a href=#130 id=130 data-nosnippet>130</a>
+<a href=#131 id=131 data-nosnippet>131</a> <span class="kw">let
</span>partition = arc_ctx.manifest_entry.data_file().partition();
+<a href=#132 id=132 data-nosnippet>132</a>
+<a href=#133 id=133 data-nosnippet>133</a> <span class="comment">//
The spec states that "Equality delete files stored with an unpartitioned spec
are applied as global deletes".
+<a href=#134 id=134 data-nosnippet>134</a> </span><span
class="kw">if </span>partition.fields().is_empty() {
+<a href=#135 id=135 data-nosnippet>135</a> <span
class="comment">// TODO: confirm we're good to skip here if we encounter a pos
del
+<a href=#136 id=136 data-nosnippet>136</a> </span><span
class="kw">if </span>arc_ctx.manifest_entry.content_type() !=
DataContentType::PositionDeletes {
+<a href=#137 id=137 data-nosnippet>137</a>
global_equality_deletes.push(arc_ctx);
+<a href=#138 id=138 data-nosnippet>138</a> <span
class="kw">return</span>;
+<a href=#139 id=139 data-nosnippet>139</a> }
+<a href=#140 id=140 data-nosnippet>140</a> }
+<a href=#141 id=141 data-nosnippet>141</a>
+<a href=#142 id=142 data-nosnippet>142</a> <span class="kw">let
</span>destination_map = <span class="kw">match
</span>arc_ctx.manifest_entry.content_type() {
+<a href=#143 id=143 data-nosnippet>143</a>
DataContentType::PositionDeletes => <span class="kw-2">&mut
</span>pos_deletes_by_partition,
+<a href=#144 id=144 data-nosnippet>144</a>
DataContentType::EqualityDeletes => <span class="kw-2">&mut
</span>eq_deletes_by_partition,
+<a href=#145 id=145 data-nosnippet>145</a> <span class="kw">_
</span>=> <span class="macro">unreachable!</span>(),
+<a href=#146 id=146 data-nosnippet>146</a> };
+<a href=#147 id=147 data-nosnippet>147</a>
+<a href=#148 id=148 data-nosnippet>148</a> destination_map
+<a href=#149 id=149 data-nosnippet>149</a>
.entry(partition.clone())
+<a href=#150 id=150 data-nosnippet>150</a> .and_modify(|entry| {
+<a href=#151 id=151 data-nosnippet>151</a>
entry.push(arc_ctx.clone());
+<a href=#152 id=152 data-nosnippet>152</a> })
+<a href=#153 id=153 data-nosnippet>153</a> .or_insert(<span
class="macro">vec!</span>[arc_ctx.clone()]);
+<a href=#154 id=154 data-nosnippet>154</a> });
+<a href=#155 id=155 data-nosnippet>155</a>
+<a href=#156 id=156 data-nosnippet>156</a> PopulatedDeleteFileIndex {
+<a href=#157 id=157 data-nosnippet>157</a> global_equality_deletes,
+<a href=#158 id=158 data-nosnippet>158</a> eq_deletes_by_partition,
+<a href=#159 id=159 data-nosnippet>159</a> pos_deletes_by_partition,
+<a href=#160 id=160 data-nosnippet>160</a> }
+<a href=#161 id=161 data-nosnippet>161</a> }
+<a href=#162 id=162 data-nosnippet>162</a>
+<a href=#163 id=163 data-nosnippet>163</a> <span class="doccomment">///
Determine all the delete files that apply to the provided `DataFile`.
+<a href=#164 id=164 data-nosnippet>164</a> </span><span class="kw">fn
</span>get_deletes_for_data_file(
+<a href=#165 id=165 data-nosnippet>165</a> <span
class="kw-2">&</span><span class="self">self</span>,
+<a href=#166 id=166 data-nosnippet>166</a> data_file: <span
class="kw-2">&</span>DataFile,
+<a href=#167 id=167 data-nosnippet>167</a> seq_num: <span
class="prelude-ty">Option</span><i64>,
+<a href=#168 id=168 data-nosnippet>168</a> ) ->
Vec<FileScanTaskDeleteFile> {
+<a href=#169 id=169 data-nosnippet>169</a> <span class="kw">let
</span><span class="kw-2">mut </span>results = <span
class="macro">vec!</span>[];
+<a href=#170 id=170 data-nosnippet>170</a>
+<a href=#171 id=171 data-nosnippet>171</a> <span
class="self">self</span>.global_equality_deletes
+<a href=#172 id=172 data-nosnippet>172</a> .iter()
+<a href=#173 id=173 data-nosnippet>173</a> <span class="comment">//
filter that returns true if the provided delete file's sequence number is
**greater than** `seq_num`
+<a href=#174 id=174 data-nosnippet>174</a> </span>.filter(|<span
class="kw-2">&</span>delete| {
+<a href=#175 id=175 data-nosnippet>175</a> seq_num
+<a href=#176 id=176 data-nosnippet>176</a> .map(|seq_num|
delete.manifest_entry.sequence_number() > <span
class="prelude-val">Some</span>(seq_num))
+<a href=#177 id=177 data-nosnippet>177</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
+<a href=#178 id=178 data-nosnippet>178</a> })
+<a href=#179 id=179 data-nosnippet>179</a> .for_each(|delete|
results.push(delete.as_ref().into()));
+<a href=#180 id=180 data-nosnippet>180</a>
+<a href=#181 id=181 data-nosnippet>181</a> <span class="kw">if let
</span><span class="prelude-val">Some</span>(deletes) = <span
class="self">self</span>.eq_deletes_by_partition.get(data_file.partition()) {
+<a href=#182 id=182 data-nosnippet>182</a> deletes
+<a href=#183 id=183 data-nosnippet>183</a> .iter()
+<a href=#184 id=184 data-nosnippet>184</a> <span
class="comment">// filter that returns true if the provided delete file's
sequence number is **greater than** `seq_num`
+<a href=#185 id=185 data-nosnippet>185</a>
</span>.filter(|<span class="kw-2">&</span>delete| {
+<a href=#186 id=186 data-nosnippet>186</a> seq_num
+<a href=#187 id=187 data-nosnippet>187</a>
.map(|seq_num| delete.manifest_entry.sequence_number() > <span
class="prelude-val">Some</span>(seq_num))
+<a href=#188 id=188 data-nosnippet>188</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
+<a href=#189 id=189 data-nosnippet>189</a> &&
data_file.partition_spec_id == delete.partition_spec_id
+<a href=#190 id=190 data-nosnippet>190</a> })
+<a href=#191 id=191 data-nosnippet>191</a> .for_each(|delete|
results.push(delete.as_ref().into()));
+<a href=#192 id=192 data-nosnippet>192</a> }
+<a href=#193 id=193 data-nosnippet>193</a>
+<a href=#194 id=194 data-nosnippet>194</a> <span class="comment">//
TODO: the spec states that:
+<a href=#195 id=195 data-nosnippet>195</a> // "The data file's
file_path is equal to the delete file's referenced_data_file if it is non-null".
+<a href=#196 id=196 data-nosnippet>196</a> // we're not yet doing
that here. The referenced data file's name will also be present in the
positional
+<a href=#197 id=197 data-nosnippet>197</a> // delete file's file
path column.
+<a href=#198 id=198 data-nosnippet>198</a> </span><span class="kw">if
let </span><span class="prelude-val">Some</span>(deletes) = <span
class="self">self</span>.pos_deletes_by_partition.get(data_file.partition()) {
+<a href=#199 id=199 data-nosnippet>199</a> deletes
+<a href=#200 id=200 data-nosnippet>200</a> .iter()
+<a href=#201 id=201 data-nosnippet>201</a> <span
class="comment">// filter that returns true if the provided delete file's
sequence number is **greater than or equal to** `seq_num`
+<a href=#202 id=202 data-nosnippet>202</a>
</span>.filter(|<span class="kw-2">&</span>delete| {
+<a href=#203 id=203 data-nosnippet>203</a> seq_num
+<a href=#204 id=204 data-nosnippet>204</a>
.map(|seq_num| delete.manifest_entry.sequence_number() >= <span
class="prelude-val">Some</span>(seq_num))
+<a href=#205 id=205 data-nosnippet>205</a>
.unwrap_or_else(|| <span class="bool-val">true</span>)
+<a href=#206 id=206 data-nosnippet>206</a> &&
data_file.partition_spec_id == delete.partition_spec_id
+<a href=#207 id=207 data-nosnippet>207</a> })
+<a href=#208 id=208 data-nosnippet>208</a> .for_each(|delete|
results.push(delete.as_ref().into()));
+<a href=#209 id=209 data-nosnippet>209</a> }
+<a href=#210 id=210 data-nosnippet>210</a>
+<a href=#211 id=211 data-nosnippet>211</a> results
+<a href=#212 id=212 data-nosnippet>212</a> }
+<a href=#213 id=213 data-nosnippet>213</a>}
+<a href=#214 id=214 data-nosnippet>214</a>
+<a href=#215 id=215 data-nosnippet>215</a><span class="attr">#[cfg(test)]
+<a href=#216 id=216 data-nosnippet>216</a></span><span class="kw">mod
</span>tests {
+<a href=#217 id=217 data-nosnippet>217</a> <span class="kw">use
</span>uuid::Uuid;
+<a href=#218 id=218 data-nosnippet>218</a>
+<a href=#219 id=219 data-nosnippet>219</a> <span class="kw">use
super</span>::<span class="kw-2">*</span>;
+<a href=#220 id=220 data-nosnippet>220</a> <span class="kw">use
</span><span class="kw">crate</span>::spec::{
+<a href=#221 id=221 data-nosnippet>221</a> DataContentType,
DataFileBuilder, DataFileFormat, Literal, ManifestEntry, ManifestStatus,
+<a href=#222 id=222 data-nosnippet>222</a> Struct,
+<a href=#223 id=223 data-nosnippet>223</a> };
+<a href=#224 id=224 data-nosnippet>224</a>
+<a href=#225 id=225 data-nosnippet>225</a> <span class="attr">#[test]
+<a href=#226 id=226 data-nosnippet>226</a> </span><span class="kw">fn
</span>test_delete_file_index_unpartitioned() {
+<a href=#227 id=227 data-nosnippet>227</a> <span class="kw">let
</span>deletes: Vec<ManifestEntry> = <span class="macro">vec!</span>[
+<a href=#228 id=228 data-nosnippet>228</a>
build_added_manifest_entry(<span class="number">4</span>, <span
class="kw-2">&</span>build_unpartitioned_eq_delete()),
+<a href=#229 id=229 data-nosnippet>229</a>
build_added_manifest_entry(<span class="number">6</span>, <span
class="kw-2">&</span>build_unpartitioned_eq_delete()),
+<a href=#230 id=230 data-nosnippet>230</a>
build_added_manifest_entry(<span class="number">5</span>, <span
class="kw-2">&</span>build_unpartitioned_pos_delete()),
+<a href=#231 id=231 data-nosnippet>231</a>
build_added_manifest_entry(<span class="number">6</span>, <span
class="kw-2">&</span>build_unpartitioned_pos_delete()),
+<a href=#232 id=232 data-nosnippet>232</a> ];
+<a href=#233 id=233 data-nosnippet>233</a>
+<a href=#234 id=234 data-nosnippet>234</a> <span class="kw">let
</span>delete_file_paths: Vec<String> = deletes
+<a href=#235 id=235 data-nosnippet>235</a> .iter()
+<a href=#236 id=236 data-nosnippet>236</a> .map(|file|
file.file_path().to_string())
+<a href=#237 id=237 data-nosnippet>237</a> .collect();
+<a href=#238 id=238 data-nosnippet>238</a>
+<a href=#239 id=239 data-nosnippet>239</a> <span class="kw">let
</span>delete_contexts: Vec<DeleteFileContext> = deletes
+<a href=#240 id=240 data-nosnippet>240</a> .into_iter()
+<a href=#241 id=241 data-nosnippet>241</a> .map(|entry|
DeleteFileContext {
+<a href=#242 id=242 data-nosnippet>242</a> manifest_entry:
entry.into(),
+<a href=#243 id=243 data-nosnippet>243</a> partition_spec_id:
<span class="number">0</span>,
+<a href=#244 id=244 data-nosnippet>244</a> })
+<a href=#245 id=245 data-nosnippet>245</a> .collect();
+<a href=#246 id=246 data-nosnippet>246</a>
+<a href=#247 id=247 data-nosnippet>247</a> <span class="kw">let
</span>delete_file_index = PopulatedDeleteFileIndex::new(delete_contexts);
+<a href=#248 id=248 data-nosnippet>248</a>
+<a href=#249 id=249 data-nosnippet>249</a> <span class="kw">let
</span>data_file = build_unpartitioned_data_file();
+<a href=#250 id=250 data-nosnippet>250</a>
+<a href=#251 id=251 data-nosnippet>251</a> <span class="comment">// All
deletes apply to sequence 0
+<a href=#252 id=252 data-nosnippet>252</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_0 =
+<a href=#253 id=253 data-nosnippet>253</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>data_file, <span class="prelude-val">Some</span>(<span
class="number">0</span>));
+<a href=#254 id=254 data-nosnippet>254</a> <span
class="macro">assert_eq!</span>(delete_files_to_apply_for_seq_0.len(), <span
class="number">4</span>);
+<a href=#255 id=255 data-nosnippet>255</a>
+<a href=#256 id=256 data-nosnippet>256</a> <span class="comment">// All
deletes apply to sequence 3
+<a href=#257 id=257 data-nosnippet>257</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_3 =
+<a href=#258 id=258 data-nosnippet>258</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>data_file, <span class="prelude-val">Some</span>(<span
class="number">3</span>));
+<a href=#259 id=259 data-nosnippet>259</a> <span
class="macro">assert_eq!</span>(delete_files_to_apply_for_seq_3.len(), <span
class="number">4</span>);
+<a href=#260 id=260 data-nosnippet>260</a>
+<a href=#261 id=261 data-nosnippet>261</a> <span class="comment">//
Last 3 deletes apply to sequence 4
+<a href=#262 id=262 data-nosnippet>262</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_4 =
+<a href=#263 id=263 data-nosnippet>263</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>data_file, <span class="prelude-val">Some</span>(<span
class="number">4</span>));
+<a href=#264 id=264 data-nosnippet>264</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_4: Vec<String> =
delete_files_to_apply_for_seq_4
+<a href=#265 id=265 data-nosnippet>265</a> .into_iter()
+<a href=#266 id=266 data-nosnippet>266</a> .map(|file|
file.file_path)
+<a href=#267 id=267 data-nosnippet>267</a> .collect();
+<a href=#268 id=268 data-nosnippet>268</a>
+<a href=#269 id=269 data-nosnippet>269</a> <span
class="macro">assert_eq!</span>(
+<a href=#270 id=270 data-nosnippet>270</a>
actual_paths_to_apply_for_seq_4,
+<a href=#271 id=271 data-nosnippet>271</a>
delete_file_paths[delete_file_paths.len() - <span class="number">3</span>..]
+<a href=#272 id=272 data-nosnippet>272</a> );
+<a href=#273 id=273 data-nosnippet>273</a>
+<a href=#274 id=274 data-nosnippet>274</a> <span class="comment">//
Last 3 deletes apply to sequence 5
+<a href=#275 id=275 data-nosnippet>275</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_5 =
+<a href=#276 id=276 data-nosnippet>276</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>data_file, <span class="prelude-val">Some</span>(<span
class="number">5</span>));
+<a href=#277 id=277 data-nosnippet>277</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_5: Vec<String> =
delete_files_to_apply_for_seq_5
+<a href=#278 id=278 data-nosnippet>278</a> .into_iter()
+<a href=#279 id=279 data-nosnippet>279</a> .map(|file|
file.file_path)
+<a href=#280 id=280 data-nosnippet>280</a> .collect();
+<a href=#281 id=281 data-nosnippet>281</a> <span
class="macro">assert_eq!</span>(
+<a href=#282 id=282 data-nosnippet>282</a>
actual_paths_to_apply_for_seq_5,
+<a href=#283 id=283 data-nosnippet>283</a>
delete_file_paths[delete_file_paths.len() - <span class="number">3</span>..]
+<a href=#284 id=284 data-nosnippet>284</a> );
+<a href=#285 id=285 data-nosnippet>285</a>
+<a href=#286 id=286 data-nosnippet>286</a> <span class="comment">//
Only the last position delete applies to sequence 6
+<a href=#287 id=287 data-nosnippet>287</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_6 =
+<a href=#288 id=288 data-nosnippet>288</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>data_file, <span class="prelude-val">Some</span>(<span
class="number">6</span>));
+<a href=#289 id=289 data-nosnippet>289</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_6: Vec<String> =
delete_files_to_apply_for_seq_6
+<a href=#290 id=290 data-nosnippet>290</a> .into_iter()
+<a href=#291 id=291 data-nosnippet>291</a> .map(|file|
file.file_path)
+<a href=#292 id=292 data-nosnippet>292</a> .collect();
+<a href=#293 id=293 data-nosnippet>293</a> <span
class="macro">assert_eq!</span>(
+<a href=#294 id=294 data-nosnippet>294</a>
actual_paths_to_apply_for_seq_6,
+<a href=#295 id=295 data-nosnippet>295</a>
delete_file_paths[delete_file_paths.len() - <span class="number">1</span>..]
+<a href=#296 id=296 data-nosnippet>296</a> );
+<a href=#297 id=297 data-nosnippet>297</a>
+<a href=#298 id=298 data-nosnippet>298</a> <span class="comment">// The
2 global equality deletes should match against any partitioned file
+<a href=#299 id=299 data-nosnippet>299</a> </span><span class="kw">let
</span>partitioned_file =
+<a href=#300 id=300 data-nosnippet>300</a>
build_partitioned_data_file(<span
class="kw-2">&</span>Struct::from_iter([<span
class="prelude-val">Some</span>(Literal::long(<span
class="number">100</span>))]), <span class="number">1</span>);
+<a href=#301 id=301 data-nosnippet>301</a>
+<a href=#302 id=302 data-nosnippet>302</a> <span class="kw">let
</span>delete_files_to_apply_for_partitioned_file =
+<a href=#303 id=303 data-nosnippet>303</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">0</span>));
+<a href=#304 id=304 data-nosnippet>304</a> <span class="kw">let
</span>actual_paths_to_apply_for_partitioned_file: Vec<String> =
+<a href=#305 id=305 data-nosnippet>305</a>
delete_files_to_apply_for_partitioned_file
+<a href=#306 id=306 data-nosnippet>306</a> .into_iter()
+<a href=#307 id=307 data-nosnippet>307</a> .map(|file|
file.file_path)
+<a href=#308 id=308 data-nosnippet>308</a> .collect();
+<a href=#309 id=309 data-nosnippet>309</a> <span
class="macro">assert_eq!</span>(
+<a href=#310 id=310 data-nosnippet>310</a>
actual_paths_to_apply_for_partitioned_file,
+<a href=#311 id=311 data-nosnippet>311</a>
delete_file_paths[..<span class="number">2</span>]
+<a href=#312 id=312 data-nosnippet>312</a> );
+<a href=#313 id=313 data-nosnippet>313</a> }
+<a href=#314 id=314 data-nosnippet>314</a>
+<a href=#315 id=315 data-nosnippet>315</a> <span class="attr">#[test]
+<a href=#316 id=316 data-nosnippet>316</a> </span><span class="kw">fn
</span>test_delete_file_index_partitioned() {
+<a href=#317 id=317 data-nosnippet>317</a> <span class="kw">let
</span>partition_one = Struct::from_iter([<span
class="prelude-val">Some</span>(Literal::long(<span
class="number">100</span>))]);
+<a href=#318 id=318 data-nosnippet>318</a> <span class="kw">let
</span>spec_id = <span class="number">1</span>;
+<a href=#319 id=319 data-nosnippet>319</a> <span class="kw">let
</span>deletes: Vec<ManifestEntry> = <span class="macro">vec!</span>[
+<a href=#320 id=320 data-nosnippet>320</a>
build_added_manifest_entry(<span class="number">4</span>, <span
class="kw-2">&</span>build_partitioned_eq_delete(<span
class="kw-2">&</span>partition_one, spec_id)),
+<a href=#321 id=321 data-nosnippet>321</a>
build_added_manifest_entry(<span class="number">6</span>, <span
class="kw-2">&</span>build_partitioned_eq_delete(<span
class="kw-2">&</span>partition_one, spec_id)),
+<a href=#322 id=322 data-nosnippet>322</a>
build_added_manifest_entry(<span class="number">5</span>, <span
class="kw-2">&</span>build_partitioned_pos_delete(<span
class="kw-2">&</span>partition_one, spec_id)),
+<a href=#323 id=323 data-nosnippet>323</a>
build_added_manifest_entry(<span class="number">6</span>, <span
class="kw-2">&</span>build_partitioned_pos_delete(<span
class="kw-2">&</span>partition_one, spec_id)),
+<a href=#324 id=324 data-nosnippet>324</a> ];
+<a href=#325 id=325 data-nosnippet>325</a>
+<a href=#326 id=326 data-nosnippet>326</a> <span class="kw">let
</span>delete_file_paths: Vec<String> = deletes
+<a href=#327 id=327 data-nosnippet>327</a> .iter()
+<a href=#328 id=328 data-nosnippet>328</a> .map(|file|
file.file_path().to_string())
+<a href=#329 id=329 data-nosnippet>329</a> .collect();
+<a href=#330 id=330 data-nosnippet>330</a>
+<a href=#331 id=331 data-nosnippet>331</a> <span class="kw">let
</span>delete_contexts: Vec<DeleteFileContext> = deletes
+<a href=#332 id=332 data-nosnippet>332</a> .into_iter()
+<a href=#333 id=333 data-nosnippet>333</a> .map(|entry|
DeleteFileContext {
+<a href=#334 id=334 data-nosnippet>334</a> manifest_entry:
entry.into(),
+<a href=#335 id=335 data-nosnippet>335</a> partition_spec_id:
spec_id,
+<a href=#336 id=336 data-nosnippet>336</a> })
+<a href=#337 id=337 data-nosnippet>337</a> .collect();
+<a href=#338 id=338 data-nosnippet>338</a>
+<a href=#339 id=339 data-nosnippet>339</a> <span class="kw">let
</span>delete_file_index = PopulatedDeleteFileIndex::new(delete_contexts);
+<a href=#340 id=340 data-nosnippet>340</a>
+<a href=#341 id=341 data-nosnippet>341</a> <span class="kw">let
</span>partitioned_file =
+<a href=#342 id=342 data-nosnippet>342</a>
build_partitioned_data_file(<span
class="kw-2">&</span>Struct::from_iter([<span
class="prelude-val">Some</span>(Literal::long(<span
class="number">100</span>))]), spec_id);
+<a href=#343 id=343 data-nosnippet>343</a>
+<a href=#344 id=344 data-nosnippet>344</a> <span class="comment">// All
deletes apply to sequence 0
+<a href=#345 id=345 data-nosnippet>345</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_0 =
+<a href=#346 id=346 data-nosnippet>346</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">0</span>));
+<a href=#347 id=347 data-nosnippet>347</a> <span
class="macro">assert_eq!</span>(delete_files_to_apply_for_seq_0.len(), <span
class="number">4</span>);
+<a href=#348 id=348 data-nosnippet>348</a>
+<a href=#349 id=349 data-nosnippet>349</a> <span class="comment">// All
deletes apply to sequence 3
+<a href=#350 id=350 data-nosnippet>350</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_3 =
+<a href=#351 id=351 data-nosnippet>351</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">3</span>));
+<a href=#352 id=352 data-nosnippet>352</a> <span
class="macro">assert_eq!</span>(delete_files_to_apply_for_seq_3.len(), <span
class="number">4</span>);
+<a href=#353 id=353 data-nosnippet>353</a>
+<a href=#354 id=354 data-nosnippet>354</a> <span class="comment">//
Last 3 deletes apply to sequence 4
+<a href=#355 id=355 data-nosnippet>355</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_4 =
+<a href=#356 id=356 data-nosnippet>356</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">4</span>));
+<a href=#357 id=357 data-nosnippet>357</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_4: Vec<String> =
delete_files_to_apply_for_seq_4
+<a href=#358 id=358 data-nosnippet>358</a> .into_iter()
+<a href=#359 id=359 data-nosnippet>359</a> .map(|file|
file.file_path)
+<a href=#360 id=360 data-nosnippet>360</a> .collect();
+<a href=#361 id=361 data-nosnippet>361</a>
+<a href=#362 id=362 data-nosnippet>362</a> <span
class="macro">assert_eq!</span>(
+<a href=#363 id=363 data-nosnippet>363</a>
actual_paths_to_apply_for_seq_4,
+<a href=#364 id=364 data-nosnippet>364</a>
delete_file_paths[delete_file_paths.len() - <span class="number">3</span>..]
+<a href=#365 id=365 data-nosnippet>365</a> );
+<a href=#366 id=366 data-nosnippet>366</a>
+<a href=#367 id=367 data-nosnippet>367</a> <span class="comment">//
Last 3 deletes apply to sequence 5
+<a href=#368 id=368 data-nosnippet>368</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_5 =
+<a href=#369 id=369 data-nosnippet>369</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">5</span>));
+<a href=#370 id=370 data-nosnippet>370</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_5: Vec<String> =
delete_files_to_apply_for_seq_5
+<a href=#371 id=371 data-nosnippet>371</a> .into_iter()
+<a href=#372 id=372 data-nosnippet>372</a> .map(|file|
file.file_path)
+<a href=#373 id=373 data-nosnippet>373</a> .collect();
+<a href=#374 id=374 data-nosnippet>374</a> <span
class="macro">assert_eq!</span>(
+<a href=#375 id=375 data-nosnippet>375</a>
actual_paths_to_apply_for_seq_5,
+<a href=#376 id=376 data-nosnippet>376</a>
delete_file_paths[delete_file_paths.len() - <span class="number">3</span>..]
+<a href=#377 id=377 data-nosnippet>377</a> );
+<a href=#378 id=378 data-nosnippet>378</a>
+<a href=#379 id=379 data-nosnippet>379</a> <span class="comment">//
Only the last position delete applies to sequence 6
+<a href=#380 id=380 data-nosnippet>380</a> </span><span class="kw">let
</span>delete_files_to_apply_for_seq_6 =
+<a href=#381 id=381 data-nosnippet>381</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_file, <span
class="prelude-val">Some</span>(<span class="number">6</span>));
+<a href=#382 id=382 data-nosnippet>382</a> <span class="kw">let
</span>actual_paths_to_apply_for_seq_6: Vec<String> =
delete_files_to_apply_for_seq_6
+<a href=#383 id=383 data-nosnippet>383</a> .into_iter()
+<a href=#384 id=384 data-nosnippet>384</a> .map(|file|
file.file_path)
+<a href=#385 id=385 data-nosnippet>385</a> .collect();
+<a href=#386 id=386 data-nosnippet>386</a> <span
class="macro">assert_eq!</span>(
+<a href=#387 id=387 data-nosnippet>387</a>
actual_paths_to_apply_for_seq_6,
+<a href=#388 id=388 data-nosnippet>388</a>
delete_file_paths[delete_file_paths.len() - <span class="number">1</span>..]
+<a href=#389 id=389 data-nosnippet>389</a> );
+<a href=#390 id=390 data-nosnippet>390</a>
+<a href=#391 id=391 data-nosnippet>391</a> <span class="comment">//
Data file with different partition tuples does not match any delete files
+<a href=#392 id=392 data-nosnippet>392</a> </span><span class="kw">let
</span>partitioned_second_file =
+<a href=#393 id=393 data-nosnippet>393</a>
build_partitioned_data_file(<span
class="kw-2">&</span>Struct::from_iter([<span
class="prelude-val">Some</span>(Literal::long(<span
class="number">200</span>))]), <span class="number">1</span>);
+<a href=#394 id=394 data-nosnippet>394</a> <span class="kw">let
</span>delete_files_to_apply_for_different_partition =
+<a href=#395 id=395 data-nosnippet>395</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_second_file, <span
class="prelude-val">Some</span>(<span class="number">0</span>));
+<a href=#396 id=396 data-nosnippet>396</a> <span class="kw">let
</span>actual_paths_to_apply_for_different_partition: Vec<String> =
+<a href=#397 id=397 data-nosnippet>397</a>
delete_files_to_apply_for_different_partition
+<a href=#398 id=398 data-nosnippet>398</a> .into_iter()
+<a href=#399 id=399 data-nosnippet>399</a> .map(|file|
file.file_path)
+<a href=#400 id=400 data-nosnippet>400</a> .collect();
+<a href=#401 id=401 data-nosnippet>401</a> <span
class="macro">assert!</span>(actual_paths_to_apply_for_different_partition.is_empty());
+<a href=#402 id=402 data-nosnippet>402</a>
+<a href=#403 id=403 data-nosnippet>403</a> <span class="comment">//
Data file with same tuple but different spec ID does not match any delete files
+<a href=#404 id=404 data-nosnippet>404</a> </span><span class="kw">let
</span>partitioned_different_spec = build_partitioned_data_file(<span
class="kw-2">&</span>partition_one, <span class="number">2</span>);
+<a href=#405 id=405 data-nosnippet>405</a> <span class="kw">let
</span>delete_files_to_apply_for_different_spec =
+<a href=#406 id=406 data-nosnippet>406</a>
delete_file_index.get_deletes_for_data_file(<span
class="kw-2">&</span>partitioned_different_spec, <span
class="prelude-val">Some</span>(<span class="number">0</span>));
+<a href=#407 id=407 data-nosnippet>407</a> <span class="kw">let
</span>actual_paths_to_apply_for_different_spec: Vec<String> =
+<a href=#408 id=408 data-nosnippet>408</a>
delete_files_to_apply_for_different_spec
+<a href=#409 id=409 data-nosnippet>409</a> .into_iter()
+<a href=#410 id=410 data-nosnippet>410</a> .map(|file|
file.file_path)
+<a href=#411 id=411 data-nosnippet>411</a> .collect();
+<a href=#412 id=412 data-nosnippet>412</a> <span
class="macro">assert!</span>(actual_paths_to_apply_for_different_spec.is_empty());
+<a href=#413 id=413 data-nosnippet>413</a> }
+<a href=#414 id=414 data-nosnippet>414</a>
+<a href=#415 id=415 data-nosnippet>415</a> <span class="kw">fn
</span>build_unpartitioned_eq_delete() -> DataFile {
+<a href=#416 id=416 data-nosnippet>416</a>
build_partitioned_eq_delete(<span class="kw-2">&</span>Struct::empty(),
<span class="number">0</span>)
+<a href=#417 id=417 data-nosnippet>417</a> }
+<a href=#418 id=418 data-nosnippet>418</a>
+<a href=#419 id=419 data-nosnippet>419</a> <span class="kw">fn
</span>build_partitioned_eq_delete(partition: <span
class="kw-2">&</span>Struct, spec_id: i32) -> DataFile {
+<a href=#420 id=420 data-nosnippet>420</a> DataFileBuilder::default()
+<a href=#421 id=421 data-nosnippet>421</a> .file_path(<span
class="macro">format!</span>(<span
class="string">"{}_equality_delete.parquet"</span>, Uuid::new_v4()))
+<a href=#422 id=422 data-nosnippet>422</a>
.file_format(DataFileFormat::Parquet)
+<a href=#423 id=423 data-nosnippet>423</a>
.content(DataContentType::EqualityDeletes)
+<a href=#424 id=424 data-nosnippet>424</a> .equality_ids(<span
class="prelude-val">Some</span>(<span class="macro">vec!</span>[<span
class="number">1</span>]))
+<a href=#425 id=425 data-nosnippet>425</a> .record_count(<span
class="number">1</span>)
+<a href=#426 id=426 data-nosnippet>426</a>
.partition(partition.clone())
+<a href=#427 id=427 data-nosnippet>427</a>
.partition_spec_id(spec_id)
+<a href=#428 id=428 data-nosnippet>428</a>
.file_size_in_bytes(<span class="number">100</span>)
+<a href=#429 id=429 data-nosnippet>429</a> .build()
+<a href=#430 id=430 data-nosnippet>430</a> .unwrap()
+<a href=#431 id=431 data-nosnippet>431</a> }
+<a href=#432 id=432 data-nosnippet>432</a>
+<a href=#433 id=433 data-nosnippet>433</a> <span class="kw">fn
</span>build_unpartitioned_pos_delete() -> DataFile {
+<a href=#434 id=434 data-nosnippet>434</a>
build_partitioned_pos_delete(<span class="kw-2">&</span>Struct::empty(),
<span class="number">0</span>)
+<a href=#435 id=435 data-nosnippet>435</a> }
+<a href=#436 id=436 data-nosnippet>436</a>
+<a href=#437 id=437 data-nosnippet>437</a> <span class="kw">fn
</span>build_partitioned_pos_delete(partition: <span
class="kw-2">&</span>Struct, spec_id: i32) -> DataFile {
+<a href=#438 id=438 data-nosnippet>438</a> DataFileBuilder::default()
+<a href=#439 id=439 data-nosnippet>439</a> .file_path(<span
class="macro">format!</span>(<span
class="string">"{}-pos-delete.parquet"</span>, Uuid::new_v4()))
+<a href=#440 id=440 data-nosnippet>440</a>
.file_format(DataFileFormat::Parquet)
+<a href=#441 id=441 data-nosnippet>441</a>
.content(DataContentType::PositionDeletes)
+<a href=#442 id=442 data-nosnippet>442</a> .record_count(<span
class="number">1</span>)
+<a href=#443 id=443 data-nosnippet>443</a>
.referenced_data_file(<span class="prelude-val">Some</span>(<span
class="string">"/some-data-file.parquet"</span>.to_string()))
+<a href=#444 id=444 data-nosnippet>444</a>
.partition(partition.clone())
+<a href=#445 id=445 data-nosnippet>445</a>
.partition_spec_id(spec_id)
+<a href=#446 id=446 data-nosnippet>446</a>
.file_size_in_bytes(<span class="number">100</span>)
+<a href=#447 id=447 data-nosnippet>447</a> .build()
+<a href=#448 id=448 data-nosnippet>448</a> .unwrap()
+<a href=#449 id=449 data-nosnippet>449</a> }
+<a href=#450 id=450 data-nosnippet>450</a>
+<a href=#451 id=451 data-nosnippet>451</a> <span class="kw">fn
</span>build_unpartitioned_data_file() -> DataFile {
+<a href=#452 id=452 data-nosnippet>452</a> DataFileBuilder::default()
+<a href=#453 id=453 data-nosnippet>453</a> .file_path(<span
class="macro">format!</span>(<span class="string">"{}-data.parquet"</span>,
Uuid::new_v4()))
+<a href=#454 id=454 data-nosnippet>454</a>
.file_format(DataFileFormat::Parquet)
+<a href=#455 id=455 data-nosnippet>455</a>
.content(DataContentType::Data)
+<a href=#456 id=456 data-nosnippet>456</a> .record_count(<span
class="number">100</span>)
+<a href=#457 id=457 data-nosnippet>457</a>
.partition(Struct::empty())
+<a href=#458 id=458 data-nosnippet>458</a> .partition_spec_id(<span
class="number">0</span>)
+<a href=#459 id=459 data-nosnippet>459</a>
.file_size_in_bytes(<span class="number">100</span>)
+<a href=#460 id=460 data-nosnippet>460</a> .build()
+<a href=#461 id=461 data-nosnippet>461</a> .unwrap()
+<a href=#462 id=462 data-nosnippet>462</a> }
+<a href=#463 id=463 data-nosnippet>463</a>
+<a href=#464 id=464 data-nosnippet>464</a> <span class="kw">fn
</span>build_partitioned_data_file(partition_value: <span
class="kw-2">&</span>Struct, spec_id: i32) -> DataFile {
+<a href=#465 id=465 data-nosnippet>465</a> DataFileBuilder::default()
+<a href=#466 id=466 data-nosnippet>466</a> .file_path(<span
class="macro">format!</span>(<span class="string">"{}-data.parquet"</span>,
Uuid::new_v4()))
+<a href=#467 id=467 data-nosnippet>467</a>
.file_format(DataFileFormat::Parquet)
+<a href=#468 id=468 data-nosnippet>468</a>
.content(DataContentType::Data)
+<a href=#469 id=469 data-nosnippet>469</a> .record_count(<span
class="number">100</span>)
+<a href=#470 id=470 data-nosnippet>470</a>
.partition(partition_value.clone())
+<a href=#471 id=471 data-nosnippet>471</a>
.partition_spec_id(spec_id)
+<a href=#472 id=472 data-nosnippet>472</a>
.file_size_in_bytes(<span class="number">100</span>)
+<a href=#473 id=473 data-nosnippet>473</a> .build()
+<a href=#474 id=474 data-nosnippet>474</a> .unwrap()
+<a href=#475 id=475 data-nosnippet>475</a> }
+<a href=#476 id=476 data-nosnippet>476</a>
+<a href=#477 id=477 data-nosnippet>477</a> <span class="kw">fn
</span>build_added_manifest_entry(data_seq_number: i64, file: <span
class="kw-2">&</span>DataFile) -> ManifestEntry {
+<a href=#478 id=478 data-nosnippet>478</a> ManifestEntry::builder()
+<a href=#479 id=479 data-nosnippet>479</a>
.status(ManifestStatus::Added)
+<a href=#480 id=480 data-nosnippet>480</a>
.sequence_number(data_seq_number)
+<a href=#481 id=481 data-nosnippet>481</a> .data_file(file.clone())
+<a href=#482 id=482 data-nosnippet>482</a> .build()
+<a href=#483 id=483 data-nosnippet>483</a> }
+<a href=#484 id=484
data-nosnippet>484</a>}</code></pre></div></section></main></body></html>
\ No newline at end of file