Skip to content

Commit 9648e97

Browse files
committed
Turn WP_Remote_File_Ranged_Reader into a proper byte source
1 parent 9036dc0 commit 9648e97

File tree

6 files changed

+252
-149
lines changed

6 files changed

+252
-149
lines changed

src/WordPress/AsyncHttp/Client.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,15 @@ private function handle_redirects( $requests ) {
696696
}
697697

698698
$this->events[ $request->id ][ Client::EVENT_REDIRECT ] = true;
699-
$this->enqueue( new Request( $redirect_url, [ 'redirected_from' => $request ] ) );
699+
$this->enqueue(
700+
new Request(
701+
$redirect_url,
702+
[
703+
...$request->get_request_info(),
704+
'redirected_from' => $request,
705+
]
706+
)
707+
);
700708
}
701709
}
702710

src/WordPress/AsyncHttp/Request.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ public function __construct( string $url, $request_info = array() ) {
6262
}
6363
}
6464

65+
public function get_request_info() {
66+
return [
67+
'http_version' => $this->http_version,
68+
'method' => $this->method,
69+
'headers' => $this->headers,
70+
'body_stream' => $this->upload_body_stream,
71+
'redirected_from' => $this->redirected_from,
72+
];
73+
}
74+
6575
public function latest_redirect() {
6676
$request = $this;
6777
while ( $request->redirected_to ) {

src/WordPress/ByteReader/WP_Byte_Reader.php

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,19 @@
55
/**
66
* Experimental interface for streaming, seekable byte readers.
77
*/
8-
interface WP_Byte_Reader {
9-
public function length(): int;
10-
public function tell(): int;
11-
public function seek( int $offset ): bool;
12-
public function is_finished(): bool;
13-
public function next_bytes(): bool;
14-
public function get_bytes(): ?string;
15-
public function get_last_error(): ?string;
8+
abstract class WP_Byte_Reader {
9+
abstract public function length();
10+
abstract public function tell(): int;
11+
abstract public function seek( int $offset ): bool;
12+
abstract public function is_finished(): bool;
13+
abstract public function next_bytes(): bool;
14+
abstract public function get_bytes(): ?string;
15+
abstract public function get_last_error(): ?string;
16+
public function read_all(): string {
17+
$buffer = '';
18+
while( $this->next_bytes() ) {
19+
$buffer .= $this->get_bytes();
20+
}
21+
return $buffer;
22+
}
1623
}

src/WordPress/ByteReader/WP_File_Reader.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
namespace WordPress\ByteReader;
44

5-
class WP_File_Reader implements WP_Byte_Reader {
5+
class WP_File_Reader extends WP_Byte_Reader {
66

77
const STATE_STREAMING = '#streaming';
88
const STATE_FINISHED = '#finished';
@@ -33,7 +33,7 @@ private function __construct( $file_path, $chunk_size ) {
3333
$this->chunk_size = $chunk_size;
3434
}
3535

36-
public function length(): int {
36+
public function length(): ?int {
3737
return filesize( $this->file_path );
3838
}
3939

src/WordPress/ByteReader/WP_Remote_File_Ranged_Reader.php

Lines changed: 141 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -20,175 +20,186 @@
2020
* var_dump($file->get_bytes());
2121
* }
2222
*
23-
* @TODO: Verify that the remote server supports range requests.
24-
* @TODO: Support requesting multiple ranges in a single request.
2523
* @TODO: Abort in-progress requests when seeking to a new offset.
2624
*/
27-
class WP_Remote_File_Ranged_Reader {
25+
class WP_Remote_File_Ranged_Reader extends WP_Byte_Reader {
2826

29-
/**
30-
* @var WordPress\AsyncHttp\Client
31-
*/
32-
private $client;
3327
private $url;
3428
private $remote_file_length;
3529

36-
private $current_request;
37-
private $offset_in_remote_file = 0;
38-
private $offset_in_current_chunk = 0;
39-
private $current_chunk;
40-
private $expected_chunk_size;
30+
private $current_reader;
31+
private $offset_in_remote_file = 0;
32+
private $default_expected_chunk_size = 10 * 1024; // 10 KB
33+
private $expected_chunk_size = 10 * 1024; // 10 KB
34+
private $stop_after_chunk = false;
35+
36+
/**
37+
* Creates a seekable reader for the remote file.
38+
* Detects support for range requests and falls back to saving the entire
39+
* file to disk when the remote server does not support range requests.
40+
*/
41+
static public function create( $url ) {
42+
$remote_file_reader = new WP_Remote_File_Ranged_Reader( $url );
43+
/**
44+
* We don't **need** the content-length header to be present.
45+
*
46+
* However, this reader is only used to read remote ZIP files,
47+
* we do need to know the length of the file to be able to read
48+
* the central directory index.
49+
*
50+
* Let's revisit this check once we need to read other types of
51+
* files.
52+
*/
53+
if(false === $remote_file_reader->length()) {
54+
return self::save_to_disk( $url );
55+
}
56+
57+
/**
58+
* Try to read the first two bytes of the file to confirm that
59+
* the remote server supports range requests.
60+
*/
61+
$remote_file_reader->seek_to_chunk(0, 2);
62+
if(false === $remote_file_reader->next_bytes()) {
63+
return self::save_to_disk( $url );
64+
}
4165

42-
public function __construct( $url, $options = array() ) {
43-
$this->client = new \WordPress\AsyncHttp\Client();
44-
$this->url = $url;
66+
$bytes = $remote_file_reader->get_bytes();
67+
if(strlen($bytes) !== 2) {
68+
// Oops! We're streaming the entire file to disk now. Let's
69+
// redirect the output to a local file and provide the caller
70+
// with a regular file reader.
71+
return self::redirect_output_to_disk( $remote_file_reader );
72+
}
73+
74+
// The remote server supports range requests, good! We can use this reader.
75+
// Let's return to the beginning of the file before returning.
76+
$remote_file_reader->seek(0);
77+
return $remote_file_reader;
4578
}
4679

47-
public function length(): int {
48-
throw new \Exception( 'Not implemented yet.' );
80+
static private function save_to_disk( $url ) {
81+
$remote_file_reader = new WP_Remote_File_Reader( $url );
82+
return self::redirect_output_to_disk( $remote_file_reader );
4983
}
5084

51-
public function request_bytes( $bytes ) {
52-
if ( null === $this->remote_file_length ) {
53-
$content_length = $this->resolve_content_length();
54-
if ( false === $content_length ) {
55-
// The remote server won't tell us what the content length is
56-
// @TODO: What should we do in this case? Content-length is critical for
57-
// stream-decompressing remote zip files, but we may not need it
58-
// for other use-cases.
85+
static private function redirect_output_to_disk( WP_Byte_Reader $reader ) {
86+
$file_path = tempnam(sys_get_temp_dir(), 'wp-remote-file-reader-') . '.epub';
87+
$file = fopen($file_path, 'w');
88+
// We may have a bytes chunk available at this point.
89+
if($reader->get_bytes()) {
90+
fwrite($file, $reader->get_bytes());
91+
}
92+
// Keep streaming the file until we're done.
93+
while($reader->next_bytes()) {
94+
fwrite($file, $reader->get_bytes());
95+
}
96+
fclose($file);
97+
if($reader->get_last_error()) {
98+
// How should we log this error?
99+
return false;
100+
}
101+
return WP_File_Reader::create( $file_path );
102+
}
103+
104+
public function __construct( $url ) {
105+
$this->url = $url;
106+
}
107+
108+
public function next_bytes(): bool {
109+
while( true ) {
110+
if ( null === $this->current_reader ) {
111+
$this->create_reader();
112+
}
113+
// Advance the offset by the length of the current chunk.
114+
if ( $this->current_reader->get_bytes() ) {
115+
$this->offset_in_remote_file += strlen( $this->current_reader->get_bytes() );
116+
}
117+
118+
// We've reached the end of the remote file, we're done.
119+
if ( $this->offset_in_remote_file >= $this->length() - 1 ) {
59120
return false;
60121
}
61-
$this->remote_file_length = $content_length;
122+
123+
// We've reached the end of the current chunk, request the next one.
124+
if ( false === $this->current_reader->next_bytes() ) {
125+
if ( $this->stop_after_chunk ) {
126+
return false;
127+
}
128+
$this->current_reader = null;
129+
continue;
130+
}
131+
132+
// We've got a chunk, return it.
133+
return true;
62134
}
135+
}
63136

64-
if ( $this->offset_in_remote_file < 0 || $this->offset_in_remote_file + $bytes > $this->remote_file_length ) {
65-
// TODO: Think through error handling
137+
public function length() {
138+
$this->ensure_content_length();
139+
if ( null === $this->remote_file_length ) {
66140
return false;
67141
}
142+
return $this->remote_file_length;
143+
}
68144

69-
$this->seek( $this->offset_in_remote_file );
70-
71-
$this->current_request = new \WordPress\AsyncHttp\Request(
145+
private function create_reader() {
146+
$this->current_reader = new WP_Remote_File_Reader(
72147
$this->url,
73148
array(
74149
'headers' => array(
75-
'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . ( $this->offset_in_remote_file + $bytes - 1 ),
150+
// @TODO: Detect when the remote server doesn't support range requests,
151+
// do something sensible. We could either stream the entire file,
152+
// or give up.
153+
'Range' => 'bytes=' . $this->offset_in_remote_file . '-' . (
154+
$this->offset_in_remote_file + $this->expected_chunk_size - 1
155+
),
76156
),
77157
)
78158
);
79-
$this->expected_chunk_size = $bytes;
80-
$this->offset_in_current_chunk = 0;
81-
if ( false === $this->client->enqueue( $this->current_request ) ) {
82-
// TODO: Think through error handling
83-
return false;
84-
}
85-
return true;
86159
}
87160

88-
public function seek( $offset ) {
161+
public function seek_to_chunk($offset, $length) {
162+
$this->current_reader->seek($offset);
163+
$this->expected_chunk_size = $length;
164+
$this->stop_after_chunk = true;
165+
}
166+
167+
public function seek( $offset ): bool {
89168
$this->offset_in_remote_file = $offset;
90169
// @TODO cancel any pending requests
91-
$this->current_request = null;
170+
$this->current_reader = null;
171+
$this->expected_chunk_size = $this->default_expected_chunk_size;
172+
$this->stop_after_chunk = false;
173+
return true;
92174
}
93175

94-
public function tell() {
176+
public function tell(): int {
95177
return $this->offset_in_remote_file;
96178
}
97179

98-
public function resolve_content_length() {
99-
if ( null !== $this->remote_file_length ) {
100-
return $this->remote_file_length;
101-
}
102-
103-
$request = new \WordPress\AsyncHttp\Request(
104-
$this->url,
105-
array( 'method' => 'HEAD' )
106-
);
107-
if ( false === $this->client->enqueue( $request ) ) {
108-
// TODO: Think through error handling
109-
return false;
110-
}
111-
while ( $this->client->await_next_event() ) {
112-
switch ( $this->client->get_event() ) {
113-
case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS:
114-
$response = $request->response;
115-
if ( false === $response ) {
116-
return false;
117-
}
118-
$content_length = $response->get_header( 'Content-Length' );
119-
if ( false === $content_length ) {
120-
return false;
121-
}
122-
return (int) $content_length;
123-
}
124-
}
180+
public function is_finished(): bool {
125181
return false;
126182
}
127183

128-
public function next_chunk() {
129-
while ( $this->client->await_next_event() ) {
130-
/**
131-
* Only process events related to the most recent request.
132-
* @TODO: Support redirects.
133-
* @TODO: Cleanup resources for stale requests.
134-
*/
135-
if ( $this->current_request->id !== $this->client->get_request()->id ) {
136-
continue;
137-
}
184+
public function get_bytes(): ?string {
185+
return $this->current_reader->get_bytes();
186+
}
138187

139-
if ( $this->offset_in_current_chunk >= $this->expected_chunk_size ) {
140-
// The remote server doesn't support range requests and sent us a chunk larger than expected.
141-
// @TODO: Handle this case. Should we stream the entire file, or give up?
142-
// Should we cache the download locally, or request the entire file again every
143-
// time we need to seek()?
144-
return false;
145-
}
188+
public function get_last_error(): ?string {
189+
// @TODO: Preserve the error information when the current reader
190+
// is reset.
191+
return $this->current_reader->get_last_error();
192+
}
146193

147-
switch ( $this->client->get_event() ) {
148-
case \WordPress\AsyncHttp\Client::EVENT_GOT_HEADERS:
149-
$request = $this->client->get_request();
150-
if ( ! $request ) {
151-
return false;
152-
}
153-
$response = $request->response;
154-
if ( false === $response ) {
155-
return false;
156-
}
157-
if (
158-
$response->status_code !== 206 ||
159-
false === $response->get_header( 'Range' )
160-
) {
161-
// The remote server doesn't support range requests
162-
// @TODO: Handle this case. Should we stream the entire file, or give up?
163-
// Should we cache the download locally, or request the entire file again every
164-
// time we need to seek()?
165-
return false;
166-
}
167-
break;
168-
case \WordPress\AsyncHttp\Client::EVENT_BODY_CHUNK_AVAILABLE:
169-
$chunk = $this->client->get_response_body_chunk();
170-
if ( ! is_string( $chunk ) ) {
171-
// TODO: Think through error handling
172-
return false;
173-
}
174-
$this->current_chunk = $chunk;
175-
$this->offset_in_remote_file += strlen( $chunk );
176-
$this->offset_in_current_chunk += strlen( $chunk );
177-
178-
return true;
179-
case \WordPress\AsyncHttp\Client::EVENT_FAILED:
180-
// TODO: Think through error handling. Errors are expected when working with
181-
// the network. Should we auto retry? Make it easy for the caller to retry?
182-
// Something else?
183-
return false;
184-
case \WordPress\AsyncHttp\Client::EVENT_FINISHED:
185-
// TODO: Think through error handling
186-
return false;
187-
}
194+
private function ensure_content_length() {
195+
if ( null !== $this->remote_file_length ) {
196+
return $this->remote_file_length;
197+
}
198+
if(null === $this->current_reader) {
199+
$this->current_reader = new WP_Remote_File_Reader( $this->url );
188200
}
201+
$this->remote_file_length = $this->current_reader->length();
202+
return $this->remote_file_length;
189203
}
190204

191-
public function get_bytes() {
192-
return $this->current_chunk;
193-
}
194205
}

0 commit comments

Comments
 (0)