diff --git a/SOURCES/python-lxml-iterparse.patch b/SOURCES/python-lxml-iterparse.patch new file mode 100644 index 00000000..34516f9e --- /dev/null +++ b/SOURCES/python-lxml-iterparse.patch @@ -0,0 +1,15 @@ +diff -up lxml-3.2.1/src/lxml/iterparse.pxi.orig lxml-3.2.1/src/lxml/iterparse.pxi +--- lxml-3.2.1/src/lxml/iterparse.pxi.orig 2013-03-29 21:50:04.000000000 +0100 ++++ lxml-3.2.1/src/lxml/iterparse.pxi 2013-07-11 17:33:02.859572207 +0200 +@@ -457,7 +457,10 @@ cdef class iterparse(_BaseParser): + return context + + cdef _close_source(self): +- if self._source is None or not self._close_source_after_read: ++ if self._source is None: ++ return ++ if not self._close_source_after_read: ++ self._source = None + return + try: + close = self._source.close diff --git a/SPECS/python-lxml.spec b/SPECS/python-lxml.spec new file mode 100644 index 00000000..478c5e90 --- /dev/null +++ b/SPECS/python-lxml.spec @@ -0,0 +1,1202 @@ +%global with_python3 1 + +Name: python-lxml +Version: 3.2.1 +Release: 4%{?dist} +Summary: ElementTree-like Python bindings for libxml2 and libxslt + +Group: Development/Libraries +License: BSD +URL: http://lxml.de +Source0: http://lxml.de/files/lxml-%{version}.tgz +Source1: http://lxml.de/files/lxml-%{version}.tgz.asc + +Patch0: python-lxml-iterparse.patch + +BuildRequires: libxslt-devel + +BuildRequires: python-devel +BuildRequires: python-setuptools +BuildRequires: Cython >= 0.17.1 + +%if 0%{?with_python3} +BuildRequires: python3-devel +BuildRequires: python3-setuptools +%endif + +%description +lxml provides a Python binding to the libxslt and libxml2 libraries. +It follows the ElementTree API as much as possible in order to provide +a more Pythonic interface to libxml2 and libxslt than the default +bindings. In particular, lxml deals with Python Unicode strings +rather than encoded UTF-8 and handles memory management automatically, +unlike the default bindings. + +%package docs +Summary: Documentation for %{name} +Group: Documentation +BuildArch: noarch +%description docs +This package provides the documentation for %{name}, e.g. the API as html. + + +%if 0%{?with_python3} +%package -n python3-lxml +Summary: ElementTree-like Python 3 bindings for libxml2 and libxslt +Group: Development/Libraries + +%description -n python3-lxml +lxml provides a Python 3 binding to the libxslt and libxml2 libraries. +It follows the ElementTree API as much as possible in order to provide +a more Pythonic interface to libxml2 and libxslt than the default +bindings. In particular, lxml deals with Python 3 Unicode strings +rather than encoded UTF-8 and handles memory management automatically, +unlike the default bindings. +%endif + +%prep +%setup -q -n lxml-%{version} + +# spurious XMLSyntaxError after finishing iterparse() (bug #874546) +%patch0 -p1 -b .close_source + +# remove the C extension so that it will be rebuilt using the latest Cython +rm -f src/lxml/lxml.etree.c +rm -f src/lxml/lxml.etree.h +rm -f src/lxml/lxml.etree_api.h +rm -f src/lxml/lxml.objectify.c + +chmod a-x doc/rest2html.py +%{__sed} -i 's/\r//' doc/s5/ui/default/print.css \ + doc/s5/ep2008/atom.rng \ + doc/s5/ui/default/iepngfix.htc + +%if 0%{?with_python3} +rm -rf %{py3dir} +cp -r . %{py3dir} +%endif + +%build +CFLAGS="%{optflags}" %{__python} setup.py build --with-cython + +%if 0%{?with_python3} +cp src/lxml/lxml.etree.c %{py3dir}/src/lxml +cp src/lxml/lxml.etree.h %{py3dir}/src/lxml +cp src/lxml/lxml.etree_api.h %{py3dir}/src/lxml +cp src/lxml/lxml.objectify.c %{py3dir}/src/lxml + +pushd %{py3dir} +CFLAGS="%{optflags}" %{__python3} setup.py build --with-cython +popd +%endif + +%install +%{__python} setup.py install --skip-build --no-compile --with-cython --root %{buildroot} + +%if 0%{?with_python3} +pushd %{py3dir} +%{__python3} setup.py install --skip-build --no-compile --with-cython --root %{buildroot} +popd +%endif + +%files +%doc LICENSES.txt PKG-INFO CREDITS.txt CHANGES.txt +%{python_sitearch}/lxml +%{python_sitearch}/lxml-*.egg-info + +%files docs +%doc doc/* + +%if 0%{?with_python3} +%files -n python3-lxml +%doc LICENSES.txt PKG-INFO CREDITS.txt CHANGES.txt +%{python3_sitearch}/lxml-*.egg-info +%{python3_sitearch}/lxml +%endif + +%changelog +* Fri Jan 24 2014 Daniel Mach - 3.2.1-4 +- Mass rebuild 2014-01-24 + +* Fri Dec 27 2013 Daniel Mach - 3.2.1-3 +- Mass rebuild 2013-12-27 + +* Thu Jul 11 2013 Jiri Popelka - 3.2.1-2 +- spurious XMLSyntaxError after finishing iterparse() (bug #874546) + +* Sun May 12 2013 Jeffrey Ollie - 3.2.1-1 +- 3.2.1 (2013-05-11) +- ================== +- +- Features added +- -------------- +- +- * The methods ``apply_templates()`` and ``process_children()`` of XSLT +- extension elements have gained two new boolean options ``elements_only`` +- and ``remove_blank_text`` that discard either all strings or +- whitespace-only strings from the result list. +- +- Bugs fixed +- ---------- +- +- * When moving Elements to another tree, the namespace cleanup mechanism +- no longer drops namespace prefixes from attributes for which it finds +- a default namespace declaration, to prevent them from appearing as +- unnamespaced attributes after serialisation. +- +- * Returning non-type objects from a custom class lookup method could lead +- to a crash. +- +- * Instantiating and using subtypes of Comments and ProcessingInstructions +- crashed. + +* Fri May 10 2013 Jeffrey Ollie - 3.2.0-1 +- 3.2.0 (2013-04-28) +- ================== +- +- Features added +- -------------- +- +- Bugs fixed +- ---------- +- +- * LP#690319: Leading whitespace could change the behaviour of the string +- parsing functions in ``lxml.html``. +- +- * LP#599318: The string parsing functions in ``lxml.html`` are more robust +- in the face of uncommon HTML content like framesets or missing body tags. +- Patch by Stefan Seelmann. +- +- * LP#712941: I/O errors while trying to access files with paths that +- contain non-ASCII characters could raise ``UnicodeDecodeError`` instead +- of properly reporting the ``IOError``. +- +- * LP#673205: Parsing from in-memory strings disabled network access in the +- default parser and made subsequent attempts to parse from a URL fail. +- +- * LP#971754: lxml.html.clean appends 'nofollow' to 'rel' attributes instead +- of overwriting the current value. +- +- * LP#715687: lxml.html.clean no longer discards scripts that are explicitly +- allowed by the user provided whitelist. Patch by Christine Koppelt. +- +- 3.1.2 (2013-04-12) +- ================== +- +- Bugs fixed +- ---------- +- +- * LP#1136509: Passing attributes through the namespace-unaware API of +- the sax bridge (i.e. the ``handler.startElement()`` method) failed +- with a ``TypeError``. Patch by Mike Bayer. +- +- * LP#1123074: Fix serialisation error in XSLT output when converting +- the result tree to a Unicode string. +- +- * GH#105: Replace illegal usage of ``xmlBufLength()`` in libxml2 2.9.0 +- by properly exported API function ``xmlBufUse()``. +- +- 3.1.1 (2013-03-29) +- ================== +- +- Features added +- -------------- +- +- Bugs fixed +- ---------- +- +- * LP#1160386: Write access to ``lxml.html.FormElement.fields`` raised +- an AttributeError in Py3. +- +- * Illegal memory access during cleanup in incremental xmlfile writer. +- +- Other changes +- ------------- +- +- * The externally useless class ``lxml.etree._BaseParser`` was removed +- from the module dict. + +* Fri Mar 8 2013 Jeffrey Ollie - 3.1.0-1 +- 3.1.0 (2013-02-10) +- ================== +- +- Features added +- -------------- +- +- * GH#89: lxml.html.clean allows overriding the set of attributes that it +- considers 'safe'. Patch by Francis Devereux. +- +- Bugs fixed +- ---------- +- +- * LP#1104370: ``copy.copy(el.attrib)`` raised an exception. It now returns +- a copy of the attributes as a plain Python dict. +- +- * GH#95: When used with namespace prefixes, the ``el.find*()`` methods +- always used the first namespace mapping that was provided for each +- path expression instead of using the one that was actually passed +- in for the current run. +- +- * LP#1092521, GH#91: Fix undefined C symbol in Python runtimes compiled +- without threading support. Patch by Ulrich Seidl. +- +- Other changes +- ------------- +- +- +- 3.1beta1 (2012-12-21) +- ===================== +- +- Features added +- -------------- +- +- * New build-time option ``--with-unicode-strings`` for Python 2 that +- makes the API always return Unicode strings for names and text +- instead of byte strings for plain ASCII content. +- +- * New incremental XML file writing API ``etree.xmlfile()``. +- +- * E factory in lxml.objectify is callable to simplify the creation of +- tags with non-identifier names without having to resort to getattr(). +- +- Bugs fixed +- ---------- +- +- * When starting from a non-namespaced element in lxml.objectify, searching +- for a child without explicitly specifying a namespace incorrectly found +- namespaced elements with the requested local name, instead of restricting +- the search to non-namespaced children. +- +- * GH#85: Deprecation warnings were fixed for Python 3.x. +- +- * GH#33: lxml.html.fromstring() failed to accept bytes input in Py3. +- +- * LP#1080792: Static build of libxml2 2.9.0 failed due to missing file. +- +- Other changes +- ------------- +- +- * The externally useless class ``_ObjectifyElementMakerCaller`` was +- removed from the module API of lxml.objectify. +- +- * LP#1075622: lxml.builder is faster for adding text to elements with +- many children. Patch by Anders Hammarquist. + +* Thu Feb 14 2013 Fedora Release Engineering - 3.0.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Mon Oct 15 2012 Jeffrey Ollie - 3.0.1-1 +- 3.0.1 (2012-10-14) +- Bugs fixed +- +- * LP#1065924: Element proxies could disappear during garbage collection +- in PyPy without proper cleanup. +- * GH#71: Failure to work with libxml2 2.6.x. +- * LP#1065139: static MacOS-X build failed in Py3. + +* Wed Oct 10 2012 Jeffrey Ollie - 3.0-1 +- 3.0 (2012-10-08) +- ================ +- +- Features added +- -------------- +- +- Bugs fixed +- ---------- +- +- * End-of-file handling was incorrect in iterparse() when reading from +- a low-level C file stream and failed in libxml2 2.9.0 due to its +- improved consistency checks. +- +- Other changes +- ------------- +- +- * The build no longer uses Cython by default unless the generated C files +- are missing. To use Cython, pass the option "--with-cython". To ignore +- the fatal build error when Cython is required but not available (e.g. to +- run special setup.py commands that do not actually run a build), pass +- "--without-cython". +- +- +- 3.0beta1 (2012-09-26) +- ===================== +- +- Features added +- -------------- +- +- * Python level access to (optional) libxml2 memory debugging features +- to simplify debugging of memory leaks etc. +- +- Bugs fixed +- ---------- +- +- * Fix a memory leak in XPath by switching to Cython 0.17.1. +- +- * Some tests were adapted to work with PyPy. +- +- Other changes +- ------------- +- +- * The code was adapted to work with the upcoming libxml2 2.9.0 release. +- +- +- 3.0alpha2 (2012-08-23) +- ====================== +- +- Features added +- -------------- +- +- * The .iter() method of elements now accepts tag arguments like "{*}name" +- to search for elements with a given local name in any namespace. With +- this addition, all combinations of wildcards now work as expected: +- "{ns}name", "{}name", "{*}name", "{ns}*", "{}*" and "{*}*". Note that +- "name" is equivalent to "{}name", but "*" is "{*}*". The same change +- applies to the .getiterator(), .itersiblings(), .iterancestors(), +- .iterdescendants(), .iterchildren() and .itertext() methods, the +- strip_attributes(), strip_elements() and strip_tags() functions as well +- as the iterparse() function. +- +- * C14N allows specifying the inclusive prefixes to be promoted to +- top-level during exclusive serialisation. +- +- Bugs fixed +- ---------- +- +- * Passing long Unicode strings into the feed() parser interface failed to +- read the entire string. +- +- Other changes +- ------------- +- +- +- 3.0alpha1 (2012-07-31) +- ====================== +- +- Features added +- -------------- +- +- * Initial support for building in PyPy (through cpyext). +- +- * DTD objects gained an API that allows read access to their +- declarations. +- +- * xpathgrep.py gained support for parsing line-by-line (e.g. +- from grep output) and for surrounding the output with a new root +- tag. +- +- * E-factory in lxml.builder accepts subtypes of known data +- types (such as string subtypes) when building elements around them. +- +- * Tree iteration and iterparse() with a selective tag +- argument supports passing a set of tags. Tree nodes will be +- returned by the iterators if they match any of the tags. +- +- Bugs fixed +- ---------- +- +- * The .find*() methods in lxml.objectify no longer use XPath +- internally, which makes them faster in many cases (especially when +- short circuiting after a single or couple of elements) and fixes +- some behavioural differences compared to lxml.etree. Note that +- this means that they no longer support arbitrary XPath expressions +- but only the subset that the ElementPath language supports. +- The previous implementation was also redundant with the normal +- XPath support, which can be used as a replacement. +- +- * el.find('*') could accidentally return a comment or processing +- instruction that happened to be in the wrong spot. (Same for the +- other .find*() methods.) +- +- * The error logging is less intrusive and avoids a global setup where +- possible. +- +- * Fixed undefined names in html5lib parser. +- +- * xpathgrep.py did not work in Python 3. +- +- * Element.attrib.update() did not accept an attrib of +- another Element as parameter. +- +- * For subtypes of ElementBase that make the .text or .tail +- properties immutable (as in objectify, for example), inserting text +- when creating Elements through the E-Factory feature of the class +- constructor would fail with an exception, stating that the text +- cannot be modified. +- +- Other changes +- -------------- +- +- * The code base was overhauled to properly use 'const' where the API +- of libxml2 and libxslt requests it. This also has an impact on the +- public C-API of lxml itself, as defined in etreepublic.pxd, as +- well as the provided declarations in the lxml/includes/ directory. +- Code that uses these declarations may have to be adapted. On the +- plus side, this fixes several C compiler warnings, also for user +- code, thus making it easier to spot real problems again. +- +- * The functionality of "lxml.cssselect" was moved into a separate PyPI +- package called "cssselect". To continue using it, you must install +- that package separately. The "lxml.cssselect" module is still +- available and provides the same interface, provided the "cssselect" +- package can be imported at runtime. +- +- * Element attributes passed in as an attrib dict or as keyword +- arguments are now sorted by (namespaced) name before being created +- to make their order predictable for serialisation and iteration. +- Note that adding or deleting attributes afterwards does not take +- that order into account, i.e. setting a new attribute appends it +- after the existing ones. +- +- * Several classes that are for internal use only were removed +- from the lxml.etree module dict: +- _InputDocument, _ResolverRegistry, _ResolverContext, _BaseContext, +- _ExsltRegExp, _IterparseContext, _TempStore, _ExceptionContext, +- __ContentOnlyElement, _AttribIterator, _NamespaceRegistry, +- _ClassNamespaceRegistry, _FunctionNamespaceRegistry, +- _XPathFunctionNamespaceRegistry, _ParserDictionaryContext, +- _FileReaderContext, _ParserContext, _PythonSaxParserTarget, +- _TargetParserContext, _ReadOnlyProxy, _ReadOnlyPIProxy, +- _ReadOnlyEntityProxy, _ReadOnlyElementProxy, _OpaqueNodeWrapper, +- _OpaqueDocumentWrapper, _ModifyContentOnlyProxy, +- _ModifyContentOnlyPIProxy, _ModifyContentOnlyEntityProxy, +- _AppendOnlyElementProxy, _SaxParserContext, _FilelikeWriter, +- _ParserSchemaValidationContext, _XPathContext, +- _XSLTResolverContext, _XSLTContext, _XSLTQuotedStringParam +- +- * Several internal classes can no longer be inherited from: +- _InputDocument, _ResolverRegistry, _ExsltRegExp, _ElementUnicodeResult, +- _IterparseContext, _TempStore, _AttribIterator, _ClassNamespaceRegistry, +- _XPathFunctionNamespaceRegistry, _ParserDictionaryContext, +- _FileReaderContext, _PythonSaxParserTarget, _TargetParserContext, +- _ReadOnlyPIProxy, _ReadOnlyEntityProxy, _OpaqueDocumentWrapper, +- _ModifyContentOnlyPIProxy, _ModifyContentOnlyEntityProxy, +- _AppendOnlyElementProxy, _FilelikeWriter, _ParserSchemaValidationContext, +- _XPathContext, _XSLTResolverContext, _XSLTContext, +- _XSLTQuotedStringParam, _XSLTResultTree, _XSLTProcessingInstruction + +* Thu Sep 27 2012 Jeffrey Ollie - 2.3.5-1 +- Bugs fixed +- +- * Crash when merging text nodes in element.remove(). +- * Crash in sax/target parser when reporting empty doctype. + +* Thu Sep 27 2012 Jeffrey Ollie - 2.3.4-1 +- Bugs fixed +- +- * Crash when building an nsmap (Element property) with empty namespace +- URIs. +- * Crash due to race condition when errors (or user messages) occur during +- threaded XSLT processing (or compilation). +- * XSLT stylesheet compilation could ignore compilation errors. + +* Sat Aug 04 2012 David Malcolm - 2.3.3-4 +- rebuild for https://fedoraproject.org/wiki/Features/Python_3.3 + +* Fri Aug 3 2012 David Malcolm - 2.3.3-3 +- remove rhel logic from with_python3 conditional + +* Sat Jul 21 2012 Fedora Release Engineering - 2.3.3-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Thu Jan 5 2012 Jeffrey C. Ollie - 2.3.3-1 +- 2.3.3 (2012-01-04) +- Features added +- +- * lxml.html.tostring() gained new serialisation options with_tail and +- doctype. +- +- Bugs fixed +- +- * Fixed a crash when using iterparse() for HTML parsing and requesting +- start events. +- * Fixed parsing of more selectors in cssselect. Whitespace before pseudo- +- elements and pseudo-classes is significant as it is a descendant +- combinator. "E :pseudo" should parse the same as "E *:pseudo", not +- "E:pseudo". Patch by Simon Sapin. +- * lxml.html.diff no longer raises an exception when hitting 'img' tags +- without 'src' attribute. + +* Mon Nov 14 2011 Jeffrey C. Ollie - 2.3.2-1 +- 2.3.2 (2011-11-11) +- Features added +- +- * lxml.objectify.deannotate() has a new boolean option +- cleanup_namespaces to remove the objectify namespace declarations +- (and generally clean up the namespace declarations) after removing +- the type annotations. +- * lxml.objectify gained its own SubElement() function as a copy of +- etree.SubElement to avoid an otherwise redundant import of +- lxml.etree on the user side. +- +- Bugs fixed +- +- * Fixed the "descendant" bug in cssselect a second time (after a first +- fix in lxml 2.3.1). The previous change resulted in a serious +- performance regression for the XPath based evaluation of the +- translated expression. Note that this breaks the usage of some +- of the generated XPath expressions as XSLT location paths that +- previously worked in 2.3.1. +- * Fixed parsing of some selectors in cssselect. Whitespace after +- combinators ">", "+" and "~" is now correctly ignored. Previously +- it was parsed as a descendant combinator. For example, "div> .foo" +- was parsed the same as "div>* .foo" instead of "div>.foo". Patch by +- Simon Sapin. + +* Sun Sep 25 2011 Jeffrey C. Ollie - 2.3.1-1 +- Features added +- -------------- +- +- * New option kill_tags in lxml.html.clean to remove specific +- tags and their content (i.e. their whole subtree). +- +- * pi.get() and pi.attrib on processing instructions to parse +- pseudo-attributes from the text content of processing instructions. +- +- * lxml.get_include() returns a list of include paths that can be +- used to compile external C code against lxml.etree. This is +- specifically required for statically linked lxml builds when code +- needs to compile against the exact same header file versions as lxml +- itself. +- +- * Resolver.resolve_file() takes an additional option +- close_file that configures if the file(-like) object will be +- closed after reading or not. By default, the file will be closed, +- as the user is not expected to keep a reference to it. +- +- Bugs fixed +- ---------- +- +- * HTML cleaning didn't remove 'data:' links. +- +- * The html5lib parser integration now uses the 'official' +- implementation in html5lib itself, which makes it work with newer +- releases of the library. +- +- * In lxml.sax, endElementNS() could incorrectly reject a plain +- tag name when the corresponding start event inferred the same plain +- tag name to be in the default namespace. +- +- * When an open file-like object is passed into parse() or +- iterparse(), the parser will no longer close it after use. This +- reverts a change in lxml 2.3 where all files would be closed. It is +- the users responsibility to properly close the file(-like) object, +- also in error cases. +- +- * Assertion error in lxml.html.cleaner when discarding top-level elements. +- +- * In lxml.cssselect, use the xpath 'A//B' (short for +- 'A/descendant-or-self::node()/B') instead of 'A/descendant::B' for the +- css descendant selector ('A B'). This makes a few edge cases to be +- consistent with the selector behavior in WebKit and Firefox, and makes +- more css expressions valid location paths (for use in xsl:template +- match). +- +- * In lxml.html, non-selected