* @license GPLv3 (or any later version) */ namespace Pressbooks\Modules\Import\Epub; use Pressbooks\Book; use Pressbooks\HtmlParser; use Pressbooks\Modules\Import\ImportGenerator; use Pressbooks\Utility\PercentageYield; class Epub201 extends ImportGenerator { const TYPE_OF = 'epub'; /** * Reference to the object that represents the Epub zip folder * * @var \ZipArchive */ protected $zip; /** * OPF Basedir * * @var string */ protected $basedir = ''; /** * String for authors, contributors * * @var string */ protected $authors; /** * If Pressbooks generated the epub file * * @var boolean */ protected $isPbEpub = false; /** * Array of manifest with type application/xhtml+xml * * @var array() */ protected $manifest = []; /** * */ function __construct() { if ( ! function_exists( 'media_handle_sideload' ) ) { require_once( ABSPATH . 'wp-admin/includes/image.php' ); require_once( ABSPATH . 'wp-admin/includes/file.php' ); require_once( ABSPATH . 'wp-admin/includes/media.php' ); } $this->zip = new \ZipArchive; } /** * @param array $upload * * @return bool */ function setCurrentImportOption( array $upload ) { try { $this->setCurrentZip( $upload['file'] ); } catch ( \Exception $e ) { return false; } $option = [ 'file' => $upload['file'], 'url' => $upload['url'] ?? null, 'file_type' => $upload['type'], 'type_of' => self::TYPE_OF, 'chapters' => [], ]; $xml = $this->getOpf(); //Format manifest to array $this->parseManifestToArray( $xml ); //Iterate each spine and get each manifest item in the order of spine foreach ( $xml->spine->children() as $item ) { /** @var \SimpleXMLElement $item */ // Get attributes $id = ''; foreach ( $item->attributes() as $key => $val ) { if ( 'idref' === $key ) { $id = (string) $val; } } //Check this manifest item exists or not if ( isset( $this->manifest[ $id ] ) ) { $href = (string) $this->manifest[ $id ]['href']; //Check manifest item is copyright or not if ( 'OEBPS/copyright.html' === $href ) { $this->pbCheck( $href ); } // Set // Extract title from file $html = $this->getZipContent( $this->basedir . $href, false ); $matches = []; preg_match( '/(?:
)/isU', '
', $result ); } return $result; } /** * Is it an EPUB generated by PB? * * @param string $copyright_file * * @return boolean * @see createCopyright() in /export/epub/class-pb-epub201.php */ protected function pbCheck( $copyright_file ) { $result = $this->getZipContent( $copyright_file ); foreach ( $result->body->div->div->p as $node ) { if ( strpos( $node->a['href'][0], 'pressbooks.com', 0 ) ) { $this->isPbEpub = true; } } // applies to PB generated EPUBs with PB_SECRET_SAUCE // @see createCopyright() in export/epub/class-pb-epub201.php if ( 'copyright-page' === $result->body->div[0]->attributes()->id[0] && 'ugc' === $result->body->div->div->attributes()->class[0] ) { $this->isPbEpub = true; } } /** * Parse HTML snippet, save all found tags using media_handle_sideload(), return the HTML with changed paths. * * @param \DOMDocument $doc * @param string $href original filename, with (relative) path * * @return \DOMDocument */ protected function scrapeAndKneadImages( \DOMDocument $doc, $href ) { $images = $doc->getElementsByTagName( 'img' ); foreach ( $images as $image ) { /** @var \DOMElement $image */ // Fetch image, change src $old_src = $image->getAttribute( 'src' ); $new_src = $this->fetchAndSaveUniqueImage( $old_src, $href ); if ( $new_src ) { // Replace with new image $image->setAttribute( 'src', $new_src ); } else { // Tag broken image $image->setAttribute( 'src', "{$old_src}#fixme" ); } } return $doc; } /** * Extract url from zip and load into WP using media_handle_sideload() * Will return an empty string if something went wrong. * * @param $url string * @param string $href original filename, with (relative) path * * @see media_handle_sideload * * @return string filename */ protected function fetchAndSaveUniqueImage( $url, $href ) { $path_parts = pathinfo( $href ); $dir = ( isset( $path_parts['dirname'] ) ) ? $path_parts['dirname'] : ''; $img_location = ( $dir ? "$dir/$url" : $url ); // Cheap cache static $already_done = []; if ( isset( $already_done[ $img_location ] ) ) { return $already_done[ $img_location ]; } /* Process */ // Basename without query string $filename = explode( '?', basename( $url ) ); $filename = array_shift( $filename ); $filename = sanitize_file_name( urldecode( $filename ) ); if ( ! preg_match( '/\.(jpe?g|gif|png)$/i', $filename ) ) { // Unsupported image type $already_done[ $img_location ] = ''; return ''; } $image_content = $this->getZipContent( "$dir/$url", false ); if ( ! $image_content ) { // Could not find image? try { // case where $url is '../Images/someimage.jpg' $trim_url = ltrim( $url, './' ); $image_content = $this->getZipContent( $this->basedir . $trim_url, false ); if ( ! $image_content ) { throw new \Exception( 'Could not import images from EPUB' ); } } catch ( \Exception $e ) { $already_done[ $img_location ] = ''; return ''; } } $tmp_name = $this->createTmpFile(); \Pressbooks\Utility\put_contents( $tmp_name, $image_content ); if ( ! \Pressbooks\Image\is_valid_image( $tmp_name, $filename ) ) { try { // changing the file name so that extension matches the mime type $filename = $this->properImageExtension( $tmp_name, $filename ); if ( ! \Pressbooks\Image\is_valid_image( $tmp_name, $filename ) ) { throw new \Exception( 'Image is corrupt, and file extension matches the mime type' ); } } catch ( \Exception $exc ) { // Garbage, Don't import $already_done[ $img_location ] = ''; return ''; } } $pid = media_handle_sideload( [ 'name' => $filename, 'tmp_name' => $tmp_name, ], 0 ); $src = wp_get_attachment_url( $pid ); if ( ! $src ) { $src = ''; // Change false to empty string } $already_done[ $img_location ] = $src; return $src; } /** * Change hrefs * * @param \DOMDocument $doc * @param string $type front-matter, part, chapter, back-matter, ... * @param string $href original filename, with (relative) path * * @return \DOMDocument */ protected function kneadHref( \DOMDocument $doc, $type, $href ) { // TODO: Fix self-referencing URLs return $doc; } /** * Parse manifest with type 'application/xhtml+xml' to array * * @param \SimpleXMLElement $xml */ protected function parseManifestToArray( \SimpleXMLElement $xml ) { foreach ( $xml->manifest->children() as $item ) { /** @var \SimpleXMLElement $item */ // Get attributes $id = ''; $type = ''; $href = ''; foreach ( $item->attributes() as $key => $val ) { if ( 'id' === $key ) { $id = (string) $val; } elseif ( 'media-type' === $key ) { $type = (string) $val; } elseif ( 'href' === $key ) { $href = $val; } } // Skip if ( 'application/xhtml+xml' !== $type ) { continue; } $this->manifest[ $id ] = [ 'type' => $type, 'href' => $href, ]; } } }